Repository: LiDan456/MAD-GANs Branch: master Commit: 3139a73a4112 Files: 45 Total size: 249.0 KB Directory structure: gitextract_fejrc7rh/ ├── .gitattributes ├── AD.py ├── AD_Invert.py ├── DR_discriminator.py ├── README.md ├── RGAN.py ├── data_utils.py ├── differential_privacy/ │ ├── dp_sgd/ │ │ └── dp_optimizer/ │ │ ├── dp_optimizer.py │ │ ├── sanitizer.py │ │ └── utils.py │ └── privacy_accountant/ │ └── tf/ │ └── accountant.py ├── eugenium_mmd.py ├── eval.py ├── experiments/ │ ├── parameters/ │ │ ├── kdd99_30_0.npy │ │ ├── kdd99_30_1.npy │ │ ├── kdd99_30_10.npy │ │ ├── kdd99_30_11.npy │ │ ├── kdd99_30_12.npy │ │ ├── kdd99_30_13.npy │ │ ├── kdd99_30_14.npy │ │ ├── kdd99_30_15.npy │ │ ├── kdd99_30_16.npy │ │ ├── kdd99_30_17.npy │ │ ├── kdd99_30_18.npy │ │ ├── kdd99_30_19.npy │ │ ├── kdd99_30_2.npy │ │ ├── kdd99_30_20.npy │ │ ├── kdd99_30_21.npy │ │ ├── kdd99_30_22.npy │ │ ├── kdd99_30_3.npy │ │ ├── kdd99_30_4.npy │ │ ├── kdd99_30_5.npy │ │ ├── kdd99_30_6.npy │ │ ├── kdd99_30_7.npy │ │ ├── kdd99_30_8.npy │ │ └── kdd99_30_9.npy │ ├── plots/ │ │ └── gs/ │ │ └── kdd99_gs_real.npy │ └── settings/ │ ├── kdd99.txt │ └── kdd99_test.txt ├── mmd.py ├── mod_core_rnn_cell_impl.py ├── model.py ├── plotting.py ├── tf_ops.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto ================================================ FILE: AD.py ================================================ import tensorflow as tf import numpy as np import pdb import json import model from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm import utils import eval import DR_discriminator import data_utils # from pyod.utils.utility import * from sklearn.utils.validation import * from sklearn.metrics.classification import * from sklearn.metrics.ranking import * from time import time begin = time() """ Here, only the discriminator was used to do the anomaly detection """ # --- get settings --- # # parse command line arguments, or use defaults parser = utils.rgan_options_parser() settings = vars(parser.parse_args()) # if a settings file is specified, it overrides command line arguments/defaults if settings['settings_file']: settings = utils.load_settings_from_file(settings) # --- get data, split --- # data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' print('Loading data from', data_path) settings["eval_single"] = False settings["eval_an"] = False samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"], settings["num_signals"], settings["sub_id"], settings["eval_single"], settings["eval_an"], data_path) # --- save settings, data --- # # no need print('Ready to run with settings:') for (k, v) in settings.items(): print(v, '\t', k) # add the settings to local environment # WARNING: at this point a lot of variables appear locals().update(settings) json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0) class myADclass(): def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index): self.epoch = epoch self.settings = settings self.samples = samples self.labels = labels self.index = index def ADfunc(self): num_samples_t = self.samples.shape[0] print('sample_shape:', self.samples.shape[0]) print('num_samples_t', num_samples_t) # -- only discriminate one batch for one time -- # D_test = np.empty([num_samples_t, self.settings['seq_length'], 1]) DL_test = np.empty([num_samples_t, self.settings['seq_length'], 1]) L_mb = np.empty([num_samples_t, self.settings['seq_length'], 1]) I_mb = np.empty([num_samples_t, self.settings['seq_length'], 1]) batch_times = num_samples_t // self.settings['batch_size'] for batch_idx in range(0, num_samples_t // self.settings['batch_size']): # print('batch_idx:{} # display batch progress model.display_batch_progression(batch_idx, batch_times) start_pos = batch_idx * self.settings['batch_size'] end_pos = start_pos + self.settings['batch_size'] T_mb = self.samples[start_pos:end_pos, :, :] L_mmb = self.labels[start_pos:end_pos, :, :] I_mmb = self.index[start_pos:end_pos, :, :] para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str( self.settings['seq_length']) + '_' + str(self.epoch) + '.npy' D_t, L_t = DR_discriminator.dis_trained_model(self.settings, T_mb, para_path) D_test[start_pos:end_pos, :, :] = D_t DL_test[start_pos:end_pos, :, :] = L_t L_mb[start_pos:end_pos, :, :] = L_mmb I_mb[start_pos:end_pos, :, :] = I_mmb start_pos = (num_samples_t // self.settings['batch_size']) * self.settings['batch_size'] end_pos = start_pos + self.settings['batch_size'] size = samples[start_pos:end_pos, :, :].shape[0] fill = np.ones([self.settings['batch_size'] - size, samples.shape[1], samples.shape[2]]) batch = np.concatenate([samples[start_pos:end_pos, :, :], fill], axis=0) para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str( self.settings['seq_length']) + '_' + str(self.epoch) + '.npy' D_t, L_t = DR_discriminator.dis_trained_model(self.settings, batch, para_path) L_mmb = self.labels[start_pos:end_pos, :, :] I_mmb = self.index[start_pos:end_pos, :, :] D_test[start_pos:end_pos, :, :] = D_t[:size, :, :] DL_test[start_pos:end_pos, :, :] = L_t[:size, :, :] L_mb[start_pos:end_pos, :, :] = L_mmb I_mb[start_pos:end_pos, :, :] = I_mmb results = np.zeros([18, 4]) for i in range(2, 8): tao = 0.1 * i Accu2, Pre2, Rec2, F12 = DR_discriminator.detection_Comb( DL_test, L_mb, I_mb, self.settings['seq_step'], tao) print('seq_length:', self.settings['seq_length']) print('Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}' .format(self.epoch, tao, Accu2, Pre2, Rec2, F12)) results[i - 2, :] = [Accu2, Pre2, Rec2, F12] Accu3, Pre3, Rec3, F13 = DR_discriminator.detection_Comb( D_test, L_mb, I_mb, self.settings['seq_step'], tao) print('seq_length:', self.settings['seq_length']) print('Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}' .format(self.epoch, tao, Accu3, Pre3, Rec3, F13)) results[i - 2+6, :] = [Accu3, Pre3, Rec3, F13] Accu5, Pre5, Rec5, F15 = DR_discriminator.sample_detection(D_test, L_mb, tao) print('seq_length:', self.settings['seq_length']) print('sample-wise-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}' .format(self.epoch, tao, Accu5, Pre5, Rec5, F15)) results[i - 2+12, :] = [Accu5, Pre5, Rec5, F15] return results if __name__ == "__main__": print('Main Starting...') Results = np.empty([settings['num_epochs'], 18, 4]) for epoch in range(settings['num_epochs']): # for epoch in range(50, 60): ob = myADclass(epoch) Results[epoch, :, :] = ob.ADfunc() # res_path = './experiments/plots/Results' + '_' + settings['sub_id'] + '_' + str( # settings['seq_length']) + '.npy' # np.save(res_path, Results) print('Main Terminating...') end = time() - begin print('Testing terminated | Training time=%d s' % (end)) ================================================ FILE: AD_Invert.py ================================================ import tensorflow as tf import numpy as np import pdb import json from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm import data_utils import plotting import model import mmd import utils import eval import DR_discriminator from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer from differential_privacy.dp_sgd.dp_optimizer import sanitizer from differential_privacy.privacy_accountant.tf import accountant """ Here, both the discriminator and generator were used to do the anomaly detection """ # --- get settings --- # # parse command line arguments, or use defaults parser = utils.rgan_options_parser() settings = vars(parser.parse_args()) # if a settings file is specified, it overrides command line arguments/defaults if settings['settings_file']: settings = utils.load_settings_from_file(settings) # --- get data, split --- # data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' print('Loading data from', data_path) samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"], settings["num_signals"], settings["sub_id"], settings["eval_single"], settings["eval_an"], data_path) # --- save settings, data --- # # no need print('Ready to run with settings:') for (k, v) in settings.items(): print(v, '\t', k) # add the settings to local environment # WARNING: at this point a lot of variables appear locals().update(settings) json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0) class myADclass(): def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index): self.epoch = epoch self.settings = settings self.samples = samples self.labels = labels self.index = index def ADfunc(self): num_samples_t = self.samples.shape[0] t_size = 500 T_index = np.random.choice(num_samples_t, size=t_size, replace=False) print('sample_shape:', self.samples.shape[0]) print('num_samples_t', num_samples_t) # -- only discriminate one batch for one time -- # D_test = np.empty([t_size, self.settings['seq_length'], 1]) DL_test = np.empty([t_size, self.settings['seq_length'], 1]) GG = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']]) T_samples = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']]) L_mb = np.empty([t_size, self.settings['seq_length'], 1]) I_mb = np.empty([t_size, self.settings['seq_length'], 1]) for batch_idx in range(0, t_size): # print('epoch:{}'.format(self.epoch)) # print('batch_idx:{}'.format(batch_idx)) # display batch progress model.display_batch_progression(batch_idx, t_size) T_mb = self.samples[T_index[batch_idx], :, :] L_mmb = self.labels[T_index[batch_idx], :, :] I_mmb = self.index[T_index[batch_idx], :, :] para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str( self.settings['seq_length']) + '_' + str(self.epoch) + '.npy' D_t, L_t = DR_discriminator.dis_D_model(self.settings, T_mb, para_path) Gs, Zs, error_per_sample, heuristic_sigma = DR_discriminator.invert(self.settings, T_mb, para_path, g_tolerance=None, e_tolerance=0.1, n_iter=None, max_iter=1000, heuristic_sigma=None) GG[batch_idx, :, :] = Gs T_samples[batch_idx, :, :] = T_mb D_test[batch_idx, :, :] = D_t DL_test[batch_idx, :, :] = L_t L_mb[batch_idx, :, :] = L_mmb I_mb[batch_idx, :, :] = I_mmb # -- use self-defined evaluation functions -- # # -- test different tao values for the detection function -- # results = np.empty([5, 5]) # for i in range(2, 8): # tao = 0.1 * i tao = 0.5 lam = 0.8 Accu1, Pre1, Rec1, F11, FPR1, D_L1 = DR_discriminator.detection_D_I(DL_test, L_mb, I_mb, self.settings['seq_step'], tao) print('seq_length:', self.settings['seq_length']) print('D:Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' .format(self.epoch, tao, Accu1, Pre1, Rec1, F11, FPR1)) results[0, :] = [Accu1, Pre1, Rec1, F11, FPR1] Accu2, Pre2, Rec2, F12, FPR2, D_L2 = DR_discriminator.detection_D_I(D_test, L_mb, I_mb, self.settings['seq_step'], tao) print('seq_length:', self.settings['seq_length']) print('D:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' .format(self.epoch, tao, Accu2, Pre2, Rec2, F12, FPR2)) results[1, :] = [Accu2, Pre2, Rec2, F12, FPR2] Accu3, Pre3, Rec3, F13, FPR3, D_L3 = DR_discriminator.detection_R_D_I(DL_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam) print('seq_length:', self.settings['seq_length']) print('RD:Comb-logits_based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' .format(self.epoch, tao, Accu3, Pre3, Rec3, F13, FPR3)) results[2, :] = [Accu3, Pre3, Rec3, F13, FPR3] Accu4, Pre4, Rec4, F14, FPR4, D_L4 = DR_discriminator.detection_R_D_I(D_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam) print('seq_length:', self.settings['seq_length']) print('RD:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' .format(self.epoch, tao, Accu4, Pre4, Rec4, F14, FPR4)) results[3, :] = [Accu4, Pre4, Rec4, F14, FPR4] Accu5, Pre5, Rec5, F15, FPR5, D_L5 = DR_discriminator.detection_R_I(GG, T_samples, L_mb, self.settings['seq_step'],tao) print('seq_length:', self.settings['seq_length']) print('G:Comb-sample-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' .format(self.epoch, tao, Accu5, Pre5, Rec5, F15, FPR5)) results[4, :] = [Accu5, Pre5, Rec5, F15, FPR5] return results, GG, D_test, DL_test if __name__ == "__main__": print('Main Starting...') Results = np.empty([settings['num_epochs'], 5, 5]) t_size = 500 D_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1]) DL_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1]) GG = np.empty([settings['num_epochs'], t_size, settings['seq_length'], settings['num_signals']]) for epoch in range(settings['num_epochs']): # for epoch in range(1): ob = myADclass(epoch) Results[epoch, :, :], GG[epoch, :, :, :], D_test[epoch, :, :, :], DL_test[epoch, :, :, :] = ob.ADfunc() res_path = './experiments/plots/Results_Invert' + '_' + settings['sub_id'] + '_' + str( settings['seq_length']) + '.npy' np.save(res_path, Results) dg_path = './experiments/plots/DG_Invert' + '_' + settings['sub_id'] + '_' + str( settings['seq_length']) + '_' np.save(dg_path + 'D_test.npy', D_test) np.save(dg_path + 'DL_test.npy', DL_test) np.save(dg_path + 'GG.npy', DL_test) print('Main Terminating...') ================================================ FILE: DR_discriminator.py ================================================ import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from matplotlib.colors import hsv_to_rgb import model import mmd from mod_core_rnn_cell_impl import LSTMCell from sklearn.metrics import precision_recall_fscore_support def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier): aa = D_test.shape[0] bb = D_test.shape[1] D_L = D_L.reshape([aa, bb, -1]) x_points = np.arange(bb) fig, ax = plt.subplots(4, 4, sharex=True) for m in range(4): for n in range(4): D = D_test[n * 4 + m, :, :] T = T_mb[n * 4 + m, :, :] L = L_mb[n * 4 + m, :, :] DL = D_L[n * 4 + m, :, :] ax[m, n].plot(x_points, D, '--g', label='Pro') ax[m, n].plot(x_points, T, 'b', label='Data') ax[m, n].plot(x_points, L, 'k', label='Label') ax[m, n].plot(x_points, DL, 'r', label='Label') ax[m, n].set_ylim(-1, 1) for n in range(4): ax[-1, n].xaxis.set_ticks(range(0, bb, int(bb/6))) fig.suptitle(epoch) fig.subplots_adjust(hspace=0.15) fig.savefig("./experiments/plots/DR_dis/" + identifier + "_epoch" + str(epoch).zfill(4) + ".png") plt.clf() plt.close() return True def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao): aa = Label_test.shape[0] bb = Label_test.shape[1] LL = (aa-1)*seq_step+bb Label_test = abs(Label_test.reshape([aa, bb])) L_mb = L_mb .reshape([aa, bb]) I_mb = I_mb .reshape([aa, bb]) D_L = np.zeros([LL, 1]) L_L = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i*10+j) D_L[i*seq_step+j] += Label_test[i, j] L_L[i * seq_step + j] += L_mb[i, j] Count[i * seq_step + j] += 1 D_L /= Count L_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if D_L[i] > tao: # true/negative D_L[i] = 0 else: # false/positive D_L[i] = 1 cc = (D_L == L_L) # print('D_L:', D_L) # print('L_L:', L_L) cc = list(cc.reshape([-1])) N = cc.count(True) print('N:', N) Accu = float((N / LL) * 100) precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') return Accu, precision, recall, f1, def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao): aa = DL_test.shape[0] bb = DL_test.shape[1] LL = (aa-1)*seq_step+bb DL_test = abs(DL_test.reshape([aa, bb])) L_mb = L_mb .reshape([aa, bb]) I_mb = I_mb .reshape([aa, bb]) D_L = np.zeros([LL, 1]) L_L = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i*10+j) D_L[i*seq_step+j] += DL_test[i, j] L_L[i * seq_step + j] += L_mb[i, j] Count[i * seq_step + j] += 1 D_L /= Count L_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if D_L[i] > tao: # true/negative D_L[i] = 0 else: # false/positive D_L[i] = 1 A = D_L[i] B = L_L[i] if A == 1 and B == 1: TP += 1 elif A == 1 and B == 0: FP += 1 elif A == 0 and B == 0: TN += 1 elif A == 0 and B == 1: FN += 1 cc = (D_L == L_L) # print('D_L:', D_L) # print('L_L:', L_L) cc = list(cc.reshape([-1])) N = cc.count(True) print('N:', N) Accu = float((N / LL) * 100) precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') # true positive among all the detected positive # Pre = (100 * TP) / (TP + FP + 1) # # true positive among all the real positive # Rec = (100 * TP) / (TP + FN + 1) # # The F1 score is the harmonic average of the precision and recall, # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate--false alarm rate FPR = (100 * FP) / (FP + TN+1) return Accu, precision, recall, f1, FPR, D_L def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao): # point-wise detection for one dimension aa = D_test.shape[0] bb = D_test.shape[1] LL = (aa-1) * seq_step + bb # print('aa:', aa) # print('bb:', bb) # print('LL:', LL) D_test = D_test.reshape([aa, bb]) L_mb = L_mb.reshape([aa, bb]) I_mb = I_mb.reshape([aa, bb]) D_L = np.zeros([LL, 1]) L_L = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i * 10 + j) D_L[i * seq_step + j] += D_test[i, j] L_L[i * seq_step + j] += L_mb[i, j] Count[i * seq_step + j] += 1 D_L /= Count L_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if D_L[i] > tao: # true/negative D_L[i] = 0 else: # false/positive D_L[i] = 1 A = D_L[i] B = L_L[i] if A == 1 and B == 1: TP += 1 elif A == 1 and B == 0: FP += 1 elif A == 0 and B == 0: TN += 1 elif A == 0 and B == 1: FN += 1 cc = (D_L == L_L) cc = list(cc.reshape([-1])) N = cc.count(True) Accu = float((N / LL) * 100) precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') # true positive among all the detected positive # Pre = (100 * TP) / (TP + FP + 1) # # true positive among all the real positive # Rec = (100 * TP) / (TP + FN + 1) # # The F1 score is the harmonic average of the precision and recall, # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate--false alarm rate FPR = (100 * FP) / (FP + TN) return Accu, precision, recall, f1, FPR, D_L def detection_D_I(DD, L_mb, I_mb, seq_step, tao): # point-wise detection for one dimension aa = DD.shape[0] bb = DD.shape[1] LL = (aa-1)*seq_step+bb DD = abs(DD.reshape([aa, bb])) L_mb = L_mb .reshape([aa, bb]) I_mb = I_mb .reshape([aa, bb]) D_L = np.zeros([LL, 1]) L_L = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i*10+j) D_L[i*10+j] += DD[i, j] L_L[i * 10 + j] += L_mb[i, j] Count[i * 10 + j] += 1 D_L /= Count L_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if D_L[i] > tao: # true/negative D_L[i] = 0 else: # false/positive D_L[i] = 1 A = D_L[i] B = L_L[i] if A == 1 and B == 1: TP += 1 elif A == 1 and B == 0: FP += 1 elif A == 0 and B == 0: TN += 1 elif A == 0 and B == 1: FN += 1 cc = (D_L == L_L) # print('D_L:', D_L) # print('L_L:', L_L) cc = list(cc.reshape([-1])) N = cc.count(True) print('N:', N) Accu = float((N / LL) * 100) # true positive among all the detected positive Pre = (100 * TP) / (TP + FP + 1) # true positive among all the real positive Rec = (100 * TP) / (TP + FN + 1) # The F1 score is the harmonic average of the precision and recall, # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate--false alarm rate FPR = (100 * FP) / (FP + TN+1) return Accu, Pre, Rec, F1, FPR, D_L def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam): # point-wise detection for one dimension # (1-lambda)*R(x)+lambda*D(x) # lambda=0.5? # D_test, Gs, T_mb, L_mb are of same size R = np.absolute(Gs - T_mb) R = np.mean(R, axis=2) aa = DD.shape[0] bb = DD.shape[1] LL = (aa - 1) * seq_step + bb DD = abs(DD.reshape([aa, bb])) DD = 1-DD L_mb = L_mb.reshape([aa, bb]) R = R.reshape([aa, bb]) D_L = np.zeros([LL, 1]) R_L = np.zeros([LL, 1]) L_L = np.zeros([LL, 1]) L_pre = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i*10+j) D_L[i * 10 + j] += DD[i, j] L_L[i * 10 + j] += L_mb[i, j] R_L[i * 10 + j] += R[i, j] Count[i * 10 + j] += 1 D_L /= Count L_L /= Count R_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if (1-lam)*R_L[i] + lam*D_L[i] > tao: # false L_pre[i] = 1 else: # true L_pre[i] = 0 A = L_pre[i] # print('A:', A) B = L_L[i] # print('B:', B) if A == 1 and B == 1: TP += 1 elif A == 1 and B == 0: FP += 1 elif A == 0 and B == 0: TN += 1 elif A == 0 and B == 1: FN += 1 cc = (L_pre == L_L) cc = list(cc.reshape([-1])) N = cc.count(True) Accu = float((N / (aa*bb)) * 100) # true positive among all the detected positive Pre = (100 * TP) / (TP + FP + 1) # true positive among all the real positive Rec = (100 * TP) / (TP + FN + 1) # The F1 score is the harmonic average of the precision and recall, # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate FPR = (100 * FP) / (FP + TN+1) return Accu, Pre, Rec, F1, FPR, L_pre def detection_R_I(Gs, T_mb, L_mb, seq_step, tao): # point-wise detection for one dimension # (1-lambda)*R(x)+lambda*D(x) # lambda=0.5? # D_test, Gs, T_mb, L_mb are of same size R = np.absolute(Gs - T_mb) R = np.mean(R, axis=2) aa = R.shape[0] bb = R.shape[1] LL = (aa - 1) * seq_step + bb L_mb = L_mb.reshape([aa, bb]) R = R.reshape([aa, bb]) L_L = np.zeros([LL, 1]) R_L = np.zeros([LL, 1]) L_pre = np.zeros([LL, 1]) Count = np.zeros([LL, 1]) for i in range(0, aa): for j in range(0, bb): # print('index:', i*10+j) L_L[i * 10 + j] += L_mb[i, j] R_L[i * 10 + j] += R[i, j] Count[i * 10 + j] += 1 L_L /= Count R_L /= Count TP, TN, FP, FN = 0, 0, 0, 0 for i in range(LL): if R_L[i] > tao: # false L_pre[i] = 1 else: # true L_pre[i] = 0 A = L_pre[i] B = L_L[i] if A == 1 and B == 1: TP += 1 elif A == 1 and B == 0: FP += 1 elif A == 0 and B == 0: TN += 1 elif A == 0 and B == 1: FN += 1 cc = (L_pre == L_L) cc = list(cc.reshape([-1])) N = cc.count(True) Accu = float((N / (aa*bb)) * 100) # true positive among all the detected positive Pre = (100 * TP) / (TP + FP + 1) # true positive among all the real positive Rec = (100 * TP) / (TP + FN + 1) # The F1 score is the harmonic average of the precision and recall, # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate FPR = (100 * FP) / (FP + TN+1) return Accu, Pre, Rec, F1, FPR, L_pre def sample_detection(D_test, L_mb, tao): # sample-wise detection for one dimension aa = D_test.shape[0] bb = D_test.shape[1] D_test = D_test.reshape([aa, bb]) L_mb = L_mb.reshape([aa, bb]) L = np.sum(L_mb, 1) # NN = 0-10 L[L > 0] = 1 D_L = np.empty([aa, ]) for i in range(aa): if np.mean(D_test[i, :]) > tao: # true/negative D_L[i] = 0 else: # false/positive D_L[i] = 1 cc = (D_L == L) # cc = list(cc) N = list(cc).count(True) Accu = float((N / (aa)) * 100) precision, recall, f1, _ = precision_recall_fscore_support(L, D_L, average='binary') return Accu, precision, recall, f1 def CUSUM_det(spe_n, spe_a, labels): mu = np.mean(spe_n) sigma = np.std(spe_n) kk = 3*sigma H = 15*sigma print('H:', H) tar = np.mean(spe_a) mm = spe_a.shape[0] SH = np.empty([mm, ]) SL = np.empty([mm, ]) for i in range(mm): SH[-1] = 0 SL[-1] = 0 SH[i] = max(0, SH[i-1]+spe_a[i]-(tar+kk)) SL[i] = min(0, SL[i-1]+spe_a[i]-(tar-kk)) count = np.empty([mm, ]) TP = 0 TN = 0 FP = 0 FN = 0 for i in range(mm): A = SH[i] B = SL[i] AA = H BB = -H if A <= AA and B >= BB: count[i] = 0 else: count[i] = 1 C = count[i] D = labels[i] if C == 1 and D == 1: TP += 1 elif C == 1 and D == 0: FP += 1 elif C == 0 and D == 0: TN += 1 elif C == 0 and D == 1: FN += 1 cc = (count == labels) # cc = list(cc) N = list(cc).count(True) Accu = float((N / (mm)) * 100) # true positive among all the detected positive Pre = (100 * TP) / (TP + FP + 1) # true positive among all the real positive Rec = (100 * TP) / (TP + FN) # The F1 score is the harmonic average of the precision and recall, # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) # False positive rate FPR = (100 * FP) / (FP + TN) return Accu, Pre, Rec, F1, FPR def SPE(X, pc): a = X.shape[0] b = X.shape[1] spe = np.empty([a]) # Square Prediction Error (square of residual distance) # spe = X'(I-PP')X I = np.identity(b, float) - np.matmul(pc.transpose(1, 0), pc) # I = np.matmul(I, I) for i in range(a): x = X[i, :].reshape([b, 1]) y = np.matmul(x.transpose(1, 0), I) spe[i] = np.matmul(y, x) return spe def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, learn_scale=True): """ If parameters are supplied, initialise as such """ # It is important to specify different variable scopes for the LSTM cells. with tf.variable_scope("generator_o") as scope: W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) try: scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) except KeyError: scale_out_G_initializer = tf.constant_initializer(value=1) assert learn_scale lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) bias_start = parameters['generator/rnn/lstm_cell/biases:0'] W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer) b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer) scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=False) inputs = z cell = LSTMCell(num_units=hidden_units_g, state_is_tuple=True, initializer=lstm_initializer, bias_start=bias_start, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=[seq_length] * batch_size, inputs=inputs) rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1] output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features]) return output_3d def discriminator_o(x, hidden_units_d, reuse=False, parameters=None): with tf.variable_scope("discriminator_0") as scope: W_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/W_out_D:0']) b_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/b_out_D:0']) W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1], initializer=W_out_D_initializer) b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=b_out_D_initializer) inputs = x cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, inputs=inputs) logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability?? # sigmoid output ([0,1]), Probability? return output, logits def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1, n_iter=None, max_iter=10000, heuristic_sigma=None): """ Return the latent space points corresponding to a set of a samples ( from gradient descent ) Note: this function is designed for ONE sample generation """ # num_samples = samples.shape[0] # cast samples to float32 samples = np.float32(samples) # get the model # if settings is a string, assume it's an identifier and load if type(settings) == str: settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,) # if not g_tolerance is None: # print('until gradient norm is below', g_tolerance) # else: # print('until error is below', e_tolerance) # get parameters parameters = model.load_parameters(para_path) # # assertions # assert samples.shape[2] == settings['num_generated_features'] # create VARIABLE Z Z = tf.get_variable(name='Z', shape=[1, settings['seq_length'], settings['latent_dim']], initializer=tf.random_normal_initializer()) # create outputs G_samples = generator_o(Z, settings['hidden_units_g'], settings['seq_length'], 1, settings['num_generated_features'], reuse=False, parameters=parameters) # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale'] # generator_settings = dict((k, settings[k]) for k in generator_vars) # G_samples = model.generator(Z, **generator_settings, reuse=True) fd = None # define loss mmd-based loss if heuristic_sigma is None: heuristic_sigma = mmd.median_pairwise_distance_o(samples) # this is noisy print('heuristic_sigma:', heuristic_sigma) samples = tf.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1))) similarity_per_sample = tf.diag_part(Kxy) reconstruction_error_per_sample = 1 - similarity_per_sample # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2]) similarity = tf.reduce_mean(similarity_per_sample) reconstruction_error = 1 - similarity # updater # solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z]) # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z]) solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(reconstruction_error_per_sample, var_list=[Z]) # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z]) grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0] grad_per_Z = tf.norm(grad_Z, axis=(1, 2)) grad_norm = tf.reduce_mean(grad_per_Z) # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z]) print('Finding latent state corresponding to samples...') sess = tf.Session() sess.run(tf.global_variables_initializer()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) error = sess.run(reconstruction_error, feed_dict=fd) g_n = sess.run(grad_norm, feed_dict=fd) # print(g_n) i = 0 if not n_iter is None: while i < n_iter: _ = sess.run(solver, feed_dict=fd) error = sess.run(reconstruction_error, feed_dict=fd) i += 1 else: if not g_tolerance is None: while g_n > g_tolerance: _ = sess.run(solver, feed_dict=fd) error, g_n = sess.run([reconstruction_error, grad_norm], feed_dict=fd) i += 1 print(error, g_n) if i > max_iter: break else: while np.abs(error) > e_tolerance: _ = sess.run(solver, feed_dict=fd) error = sess.run(reconstruction_error, feed_dict=fd) i += 1 # print(error) if i > max_iter: break Zs = sess.run(Z, feed_dict=fd) Gs = sess.run(G_samples, feed_dict={Z: Zs}) error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd) print('Z found in', i, 'iterations with final reconstruction error of', error) tf.reset_default_graph() return Gs, Zs, error_per_sample, heuristic_sigma def dis_trained_model(settings, samples, para_path): """ Return the discrimination results of num_samples testing samples from a trained model described by settings dict Note: this function is designed for ONE sample discrimination """ # if settings is a string, assume it's an identifier and load if type(settings) == str: settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) num_samples = samples.shape[0] samples = np.float32(samples) num_variables = samples.shape[2] # samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) # get the parameters, get other variables # parameters = model.load_parameters(settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch)) parameters = model.load_parameters(para_path) # settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch) # create placeholder, T samples # T = tf.placeholder(tf.float32, [settings['batch_size'], settings['seq_length'], settings['num_generated_features']]) T = tf.placeholder(tf.float32, [num_samples, settings['seq_length'], num_variables]) # create the discriminator (GAN) # normal GAN D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters) config = tf.ConfigProto() config.gpu_options.allow_growth = True # with tf.device('/gpu:1'): with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples}) tf.reset_default_graph() return D_t, L_t def dis_D_model(settings, samples, para_path): """ Return the discrimination results of num_samples testing samples from a trained model described by settings dict Note: this function is designed for ONE sample discrimination """ # if settings is a string, assume it's an identifier and load if type(settings) == str: settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) # num_samples = samples.shape[0] samples = np.float32(samples) samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) # get the parameters, get other variables parameters = model.load_parameters(para_path) # create placeholder, T samples T = tf.placeholder(tf.float32, [1, settings['seq_length'], settings['num_generated_features']]) # create the discriminator (GAN or CGAN) # normal GAN D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters) # D_t, L_t = model.discriminator(T, settings['hidden_units_d'], settings['seq_length'], num_samples, reuse=False, # parameters=parameters, cond_dim=0, c=None, batch_mean=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session() as sess: sess.run(tf.global_variables_initializer()) D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples}) tf.reset_default_graph() return D_t, L_t ================================================ FILE: README.md ================================================ # -- Multivariate Anomaly Detection for Time Series Data with GANs -- # # MAD-GAN This repository contains code for the paper, _[MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks](https://arxiv.org/pdf/1901.04997.pdf)_, by Dan Li, Dacheng Chen, Jonathan Goh, and See-Kiong Ng. MAD-GAN is a refined version of GAN-AD at _[Anomaly Detection with Generative Adversarial Networks for Multivariate Time Series](https://arxiv.org/pdf/1809.04758.pdf)_ The code can be found at https://github.com/LiDan456/GAN-AD (We are still working on this topic, will upload the completed version later...) ## Overview We used generative adversarial networks (GANs) to do anomaly detection for time series data. The GAN framework was **R**GAN, whihc was taken from the paper, _[Real-valued (Medical) Time Series Generation with Recurrent Conditional GANs](https://arxiv.org/abs/1706.02633). Please refer to https://github.com/ratschlab/RGAN for the original code. ## Quickstart - Python3 - Please unpack the data.7z file in the data folder before run RGAN.py and AD.py - To train the model: """ python RGAN.py --settings_file kdd99 """ - To do anomaly detection: """ python AD.py --settings_file kdd99_test""" """ python AD_Invert.py --settings_file kdd99_test""" ## Data We apply our method on the SWaT and WADI datasets in the paper, however, we didn't upload the data in this repository. Please refer to https://itrust.sutd.edu.sg/ and send request to iTrust is you want to try the data. In this repository we used kdd cup 1999 dataset as an example (please unpack the data.7z file in the data folder before run RGAN.py and AD.py). You can also down load the original data at http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html ================================================ FILE: RGAN.py ================================================ import numpy as np import tensorflow as tf import pdb import random import json from scipy.stats import mode import data_utils import plotting import model import utils import eval import DR_discriminator from time import time from math import floor from mmd import rbf_mmd2, median_pairwise_distance, mix_rbf_mmd2_and_ratio begin = time() tf.logging.set_verbosity(tf.logging.ERROR) # --- get settings --- # # parse command line arguments, or use defaults parser = utils.rgan_options_parser() settings = vars(parser.parse_args()) # if a settings file is specified, it overrides command line arguments/defaults if settings['settings_file']: settings = utils.load_settings_from_file(settings) # --- get data, split --- # # samples, pdf, labels = data_utils.get_data(settings) data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' print('Loading data from', data_path) settings["eval_an"] = False settings["eval_single"] = False samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"], settings["num_signals"], settings['sub_id'], settings["eval_single"], settings["eval_an"], data_path) print('samples_size:',samples.shape) # -- number of variables -- # num_variables = samples.shape[2] print('num_variables:', num_variables) # --- save settings, data --- # print('Ready to run with settings:') for (k, v) in settings.items(): print(v, '\t', k) # add the settings to local environment # WARNING: at this point a lot of variables appear locals().update(settings) json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0) # --- build model --- # # preparation: data placeholders and model parameters Z, X, T = model.create_placeholders(batch_size, seq_length, latent_dim, num_variables) discriminator_vars = ['hidden_units_d', 'seq_length', 'batch_size', 'batch_mean'] discriminator_settings = dict((k, settings[k]) for k in discriminator_vars) generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'learn_scale'] generator_settings = dict((k, settings[k]) for k in generator_vars) generator_settings['num_signals'] = num_variables # model: GAN losses D_loss, G_loss = model.GAN_loss(Z, X, generator_settings, discriminator_settings) D_solver, G_solver, priv_accountant = model.GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_examples=samples.shape[0], l2norm_bound=l2norm_bound, batches_per_lot=batches_per_lot, sigma=dp_sigma, dp=dp) # model: generate samples for visualization G_sample = model.generator(Z, **generator_settings, reuse=True) # # --- evaluation settings--- # # # # frequency to do visualisations # num_samples = samples.shape[0] # vis_freq = max(6600 // num_samples, 1) # eval_freq = max(6600// num_samples, 1) # # # get heuristic bandwidth for mmd kernel from evaluation samples # heuristic_sigma_training = median_pairwise_distance(samples) # best_mmd2_so_far = 1000 # # # optimise sigma using that (that's t-hat) # batch_multiplier = 5000 // batch_size # eval_size = batch_multiplier * batch_size # eval_eval_size = int(0.2 * eval_size) # eval_real_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features]) # eval_sample_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features]) # n_sigmas = 2 # sigma = tf.get_variable(name='sigma', shape=n_sigmas, initializer=tf.constant_initializer( # value=np.power(heuristic_sigma_training, np.linspace(-1, 3, num=n_sigmas)))) # mmd2, that = mix_rbf_mmd2_and_ratio(eval_real_PH, eval_sample_PH, sigma) # with tf.variable_scope("SIGMA_optimizer"): # sigma_solver = tf.train.RMSPropOptimizer(learning_rate=0.05).minimize(-that, var_list=[sigma]) # # sigma_solver = tf.train.AdamOptimizer().minimize(-that, var_list=[sigma]) # # sigma_solver = tf.train.AdagradOptimizer(learning_rate=0.1).minimize(-that, var_list=[sigma]) # sigma_opt_iter = 2000 # sigma_opt_thresh = 0.001 # sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name] # --- run the program --- # config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # sess = tf.Session() sess.run(tf.global_variables_initializer()) # # -- plot the real samples -- # vis_real_indices = np.random.choice(len(samples), size=16) vis_real = np.float32(samples[vis_real_indices, :, :]) plotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs) plotting.save_samples_real(vis_real, identifier) # --- train --- # train_vars = ['batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim'] train_settings = dict((k, settings[k]) for k in train_vars) train_settings['num_signals'] = num_variables t0 = time() MMD = np.zeros([num_epochs, ]) for epoch in range(num_epochs): # for epoch in range(1): # -- train epoch -- # D_loss_curr, G_loss_curr = model.train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver, **train_settings) # # -- eval -- # # # visualise plots of generated samples, with/without labels # # choose which epoch to visualize # # # random input vectors for the latent space, as the inputs of generator # vis_ZZ = model.sample_Z(batch_size, seq_length, latent_dim, use_time) # # # # -- generate samples-- # # vis_sample = sess.run(G_sample, feed_dict={Z: vis_ZZ}) # # # -- visualize the generated samples -- # # plotting.save_plot_sample(vis_sample, epoch, identifier, n_samples=16, num_epochs=None, ncol=4) # # plotting.save_plot_sample(vis_sample, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs) # # # save the generated samples in cased they might be useful for comparison # plotting.save_samples(vis_sample, identifier, epoch) # -- print -- # print('epoch, D_loss_curr, G_loss_curr, seq_length') print('%d\t%.4f\t%.4f\t%d' % (epoch, D_loss_curr, G_loss_curr, seq_length)) # # -- compute mmd2 and if available, prob density -- # # if epoch % eval_freq == 0: # # how many samples to evaluate with? # eval_Z = model.sample_Z(eval_size, seq_length, latent_dim, use_time) # eval_sample = np.empty(shape=(eval_size, seq_length, num_signals)) # for i in range(batch_multiplier): # eval_sample[i * batch_size:(i + 1) * batch_size, :, :] = sess.run(G_sample, feed_dict={ Z: eval_Z[i * batch_size:(i + 1) * batch_size]}) # eval_sample = np.float32(eval_sample) # eval_real = np.float32(samples['vali'][np.random.choice(len(samples['vali']), size=batch_multiplier * batch_size), :, :]) # # eval_eval_real = eval_real[:eval_eval_size] # eval_test_real = eval_real[eval_eval_size:] # eval_eval_sample = eval_sample[:eval_eval_size] # eval_test_sample = eval_sample[eval_eval_size:] # # # MMD # # reset ADAM variables # sess.run(tf.initialize_variables(sigma_opt_vars)) # sigma_iter = 0 # that_change = sigma_opt_thresh * 2 # old_that = 0 # while that_change > sigma_opt_thresh and sigma_iter < sigma_opt_iter: # new_sigma, that_np, _ = sess.run([sigma, that, sigma_solver], # feed_dict={eval_real_PH: eval_eval_real, eval_sample_PH: eval_eval_sample}) # that_change = np.abs(that_np - old_that) # old_that = that_np # sigma_iter += 1 # opt_sigma = sess.run(sigma) # try: # mmd2, that_np = sess.run(mix_rbf_mmd2_and_ratio(eval_test_real, eval_test_sample, biased=False, sigmas=sigma)) # except ValueError: # mmd2 = 'NA' # that = 'NA' # # MMD[epoch, ] = mmd2 # -- save model parameters -- # model.dump_parameters(sub_id + '_' + str(seq_length) + '_' + str(epoch), sess) np.save('./experiments/plots/gs/' + identifier + '_' + 'MMD.npy', MMD) end = time() - begin print('Training terminated | Training time=%d s' %(end) ) print("Training terminated | training time = %ds " % (time() - begin)) ================================================ FILE: data_utils.py ================================================ import numpy as np import pandas as pd import pdb import re from time import time import json import random import model from scipy.spatial.distance import pdist, squareform from scipy.stats import multivariate_normal, invgamma, mode from scipy.special import gamma from scipy.misc.pilutil import imresize from functools import partial from math import ceil from sklearn.metrics.pairwise import rbf_kernel from sklearn.preprocessing import MinMaxScaler from sklearn import preprocessing # --- deal with the SWaT data --- # def swat(seq_length, seq_step, num_signals, randomize=False): """ Load and serialise """ # train = np.load('./data/swat.npy') # print('Loaded swat from .npy') train = np.loadtxt(open('./data/swat.csv'), delimiter=',') print('Loaded swat from .csv') m, n = train.shape # m=496800, n=52 for i in range(n - 1): A = max(train[:, i]) if A != 0: train[:, i] /= max(train[:, i]) # scale from -1 to 1 train[:, i] = 2 * train[:, i] - 1 else: train[:, i] = train[:, i] samples = train[21600:, 0:n-1] labels = train[21600:, n-1] # the last colummn is label ############################# # -- choose variable for uni-variate GAN-AD -- # # samples = samples[:, [1, 8, 18, 28]] ############################ # -- apply PCA dimension reduction for multi-variate GAN-AD -- # from sklearn.decomposition import PCA # ALL SENSORS IDX # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47] # X_n = samples[:, XS] # X_a = samples_a[:, XS] # All VARIABLES X_n = samples #################################### ################################### # -- the best PC dimension is chosen pc=5 -- # n_components = num_signals pca = PCA(n_components, svd_solver='full') pca.fit(X_n) ex_var = pca.explained_variance_ratio_ pc = pca.components_ # projected values on the principal component T_n = np.matmul(X_n, pc.transpose(1, 0)) samples = T_n # # only for one-dimensional # samples = T_n.reshape([samples.shape[0], ]) ########################################### ########################################### # seq_length = 7200 num_samples = (samples.shape[0]-seq_length)//seq_step print("num_samples:", num_samples) print("num_signals:", num_signals) aa = np.empty([num_samples, seq_length, num_signals]) bb = np.empty([num_samples, seq_length, 1]) for j in range(num_samples): bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j*seq_step + seq_length), i] # samples = aa[:, 0:7200:200, :] # labels = bb[:, 0:7200:200, :] samples = aa labels = bb return samples, labels def swat_birgan(seq_length, seq_step, num_signals, randomize=False): """ Load and serialise """ # train = np.load('./data/swat.npy') # print('Loaded swat from .npy') train = np.loadtxt(open('./data/swat.csv'), delimiter=',') print('Loaded swat from .csv') m, n = train.shape # m=496800, n=52 for i in range(n - 1): A = max(train[:, i]) if A != 0: train[:, i] /= max(train[:, i]) # scale from -1 to 1 train[:, i] = 2 * train[:, i] - 1 else: train[:, i] = train[:, i] samples = train[21600:, 0:n-1] labels = train[21600:, n-1] # the last colummn is label ############################# # # -- choose variable for uni-variate GAN-AD -- # # # samples = samples[:, [1, 8, 18, 28]] ########################################### ########################################### nn = samples.shape[1] num_samples = (samples.shape[0]-seq_length)//seq_step aa = np.empty([num_samples, nn, nn]) AA = np.empty([seq_length, nn]) bb = np.empty([num_samples, seq_length, 1]) print('Pre-process training data...') for j in range(num_samples): # display batch progress model_bigan.display_batch_progression(j, num_samples) bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) for i in range(nn): AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] aa[j, :, :] = np.cov(AA.T) samples = aa labels = bb return samples, labels def swat_test(seq_length, seq_step, num_signals, randomize=False): """ Load and serialise """ # test = np.load('./data/swat_a.npy') # print('Loaded swat_a from .npy') test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',') print('Loaded swat_a from .csv') m, n = test.shape # m1=449919, n1=52 for i in range(n - 1): B = max(test[:, i]) if B != 0: test[:, i] /= max(test[:, i]) # scale from -1 to 1 test[:, i] = 2 * test[:, i] - 1 else: test[:, i] = test[:, i] samples = test[:, 0:n - 1] labels = test[:, n - 1] idx = np.asarray(list(range(0, m))) # record the idx of each point ############################# # -- choose variable for uni-variate GAN-AD -- # # samples = samples[:, [1,2,3,4]] # samples_a = samples_a[:, [1,2,3,4]] ############################ ############################ # -- apply PCA dimension reduction for multi-variate GAN-AD -- # from sklearn.decomposition import PCA import DR_discriminator as dr # ALL SENSORS IDX # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47] # X_n = samples[:, XS] # X_a = samples_a[:, XS] # All VARIABLES X_a = samples #################################### ################################### # -- the best PC dimension is chosen pc=5 -- # n_components = num_signals pca_a = PCA(n_components, svd_solver='full') pca_a.fit(X_a) pc_a = pca_a.components_ # projected values on the principal component T_a = np.matmul(X_a, pc_a.transpose(1, 0)) samples = T_a # # only for one-dimensional # samples = T_a.reshape([samples.shape[0], ]) ########################################### ########################################### num_samples_t = (samples.shape[0] - seq_length) // seq_step aa = np.empty([num_samples_t, seq_length, num_signals]) bb = np.empty([num_samples_t, seq_length, 1]) bbb = np.empty([num_samples_t, seq_length, 1]) for j in range(num_samples_t): bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] samples = aa labels = bb index = bbb return samples, labels, index def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False): """ Load and serialise """ # train = np.load('./data/swat.npy') # print('Loaded swat from .npy') test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',') print('Loaded swat_a from .csv') m, n = test.shape # m1=449919, n1=52 for i in range(n - 1): B = max(test[:, i]) if B != 0: test[:, i] /= max(test[:, i]) # scale from -1 to 1 test[:, i] = 2 * test[:, i] - 1 else: test[:, i] = test[:, i] samples = test[:, 0:n - 1] labels = test[:, n - 1] # idx = np.asarray(list(range(0, m))) # record the idx of each point ############################# # # -- choose variable for uni-variate GAN-AD -- # # # samples = samples[:, [1, 8, 18, 28]] ########################################### ########################################### nn = samples.shape[1] num_samples = (samples.shape[0]-seq_length)//seq_step aa = np.empty([num_samples, nn, nn]) AA = np.empty([seq_length, nn]) bb = np.empty([num_samples, seq_length, 1]) print('Pre-process testing data...') for j in range(num_samples): # display batch progress model_bigan.display_batch_progression(j, num_samples) bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) for i in range(nn): AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] aa[j, :, :] = np.cov(AA.T) samples = aa labels = bb return samples, labels def wadi(seq_length, seq_step, num_signals, randomize=False): train = np.load('./data/wadi.npy') print('Loaded wadi from .npy') m, n = train.shape # m=1048571, n=119 for i in range(n-1): A = max(train[:, i]) if A != 0: train[:, i] /= max(train[:, i]) # scale from -1 to 1 train[:, i] = 2 * train[:, i] - 1 else: train[:, i] = train[:, i] samples = train[259200:, 0:n-1] # normal labels = train[259200:, n-1] ############################# samples = samples[:, [0, 3, 6, 17]] # samples = samples[:, 0] ############################ # # -- apply PCA dimension reduction for multi-variate GAN-AD -- # # from sklearn.decomposition import PCA # import DR_discriminator as dr # X_n = samples # #################################### # ################################### # # -- the best PC dimension is chosen pc=8 -- # # n_components = num_signals # pca = PCA(n_components, svd_solver='full') # pca.fit(X_n) # pc = pca.components_ # # projected values on the principal component # T_n = np.matmul(X_n, pc.transpose(1, 0)) # # samples = T_n # # # only for one-dimensional # # samples = T_n.reshape([samples.shape[0], ]) ########################################### ########################################### seq_length = 10800 num_samples = (samples.shape[0] - seq_length) // seq_step print("num_samples:", num_samples) print("num_signals:", num_signals) aa = np.empty([num_samples, seq_length, num_signals]) bb = np.empty([num_samples, seq_length, 1]) for j in range(num_samples): bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) # aa[j, :, :] = np.reshape(samples[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] samples = aa[:, 0:10800:300, :] labels = bb[:, 0:10800:300, :] return samples, labels def wadi_test(seq_length, seq_step, num_signals, randomize=False): test = np.load('./data/wadi_a.npy') print('Loaded wadi_a from .npy') m, n = test.shape # m1=172801, n1=119 for i in range(n - 1): B = max(test[:, i]) if B != 0: test[:, i] /= max(test[:, i]) # scale from -1 to 1 test[:, i] = 2 * test[:, i] - 1 else: test[:, i] = test[:, i] samples = test[:, 0:n - 1] labels = test[:, n - 1] idx = np.asarray(list(range(0, m))) # record the idx of each point ############################# ############################ # -- apply PCA dimension reduction for multi-variate GAN-AD -- # from sklearn.decomposition import PCA import DR_discriminator as dr X_a = samples #################################### ################################### # -- the best PC dimension is chosen pc=8 -- # n_components = num_signals pca_a = PCA(n_components, svd_solver='full') pca_a.fit(X_a) pc_a = pca_a.components_ # projected values on the principal component T_a = np.matmul(X_a, pc_a.transpose(1, 0)) samples = T_a # # only for one-dimensional # samples = T_a.reshape([samples.shape[0], ]) ########################################### ########################################### num_samples_t = (samples.shape[0] - seq_length) // seq_step aa = np.empty([num_samples_t, seq_length, num_signals]) bb = np.empty([num_samples_t, seq_length, 1]) bbb = np.empty([num_samples_t, seq_length, 1]) for j in range(num_samples_t): bb[j, :, :] = np.reshape(labels[(j * 10):(j * seq_step + seq_length)], [-1, 1]) bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] samples = aa labels = bb index = bbb return samples, labels, index def kdd99(seq_length, seq_step, num_signals): train = np.load('./data/kdd99_train.npy') print('load kdd99_train from .npy') m, n = train.shape # m=562387, n=35 # normalization for i in range(n - 1): # print('i=', i) A = max(train[:, i]) # print('A=', A) if A != 0: train[:, i] /= max(train[:, i]) # scale from -1 to 1 train[:, i] = 2 * train[:, i] - 1 else: train[:, i] = train[:, i] samples = train[:, 0:n - 1] labels = train[:, n - 1] # the last colummn is label ############################# ############################ # -- apply PCA dimension reduction for multi-variate GAN-AD -- # from sklearn.decomposition import PCA X_n = samples #################################### ################################### # -- the best PC dimension is chosen pc=6 -- # n_components = num_signals pca = PCA(n_components, svd_solver='full') pca.fit(X_n) ex_var = pca.explained_variance_ratio_ pc = pca.components_ # projected values on the principal component T_n = np.matmul(X_n, pc.transpose(1, 0)) samples = T_n # # only for one-dimensional # samples = T_n.reshape([samples.shape[0], ]) ########################################### ########################################### num_samples = (samples.shape[0] - seq_length) // seq_step aa = np.empty([num_samples, seq_length, num_signals]) bb = np.empty([num_samples, seq_length, 1]) for j in range(num_samples): bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] samples = aa labels = bb return samples, labels def kdd99_test(seq_length, seq_step, num_signals): test = np.load('./data/kdd99_test.npy') print('load kdd99_test from .npy') m, n = test.shape # m1=494021, n1=35 for i in range(n - 1): B = max(test[:, i]) if B != 0: test[:, i] /= max(test[:, i]) # scale from -1 to 1 test[:, i] = 2 * test[:, i] - 1 else: test[:, i] = test[:, i] samples = test[:, 0:n - 1] labels = test[:, n - 1] idx = np.asarray(list(range(0, m))) # record the idx of each point ############################# ############################ # -- apply PCA dimension reduction for multi-variate GAN-AD -- # from sklearn.decomposition import PCA import DR_discriminator as dr X_a = samples #################################### ################################### # -- the best PC dimension is chosen pc=6 -- # n_components = num_signals pca_a = PCA(n_components, svd_solver='full') pca_a.fit(X_a) pc_a = pca_a.components_ # projected values on the principal component T_a = np.matmul(X_a, pc_a.transpose(1, 0)) samples = T_a # # only for one-dimensional # samples = T_a.reshape([samples.shape[0], ]) ########################################### ########################################### num_samples_t = (samples.shape[0] - seq_length) // seq_step aa = np.empty([num_samples_t, seq_length, num_signals]) bb = np.empty([num_samples_t, seq_length, 1]) bbb = np.empty([num_samples_t, seq_length, 1]) for j in range(num_samples_t): bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) for i in range(num_signals): aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] samples = aa labels = bb index = bbb return samples, labels, index # ############################ data pre-processing ################################# # --- to do with loading --- # # --- to do with loading --- # def get_samples_and_labels(settings): """ Parse settings options to load or generate correct type of data, perform test/train split as necessary, and reform into 'samples' and 'labels' dictionaries. """ if settings['data_load_from']: data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' print('Loading data from', data_path) samples, pdf, labels = get_data('load', data_path) train, vali, test = samples['train'], samples['vali'], samples['test'] train_labels, vali_labels, test_labels = labels['train'], labels['vali'], labels['test'] del samples, labels else: # generate the data data_vars = ['num_samples', 'num_samples_t','seq_length', 'seq_step', 'num_signals', 'freq_low', 'freq_high', 'amplitude_low', 'amplitude_high', 'scale', 'full_mnist'] data_settings = dict((k, settings[k]) for k in data_vars if k in settings.keys()) samples, pdf, labels = get_data(settings['data'], settings['seq_length'], settings['seq_step'], settings['num_signals'], settings['sub_id']) if 'multivariate_mnist' in settings and settings['multivariate_mnist']: seq_length = samples.shape[1] samples = samples.reshape(-1, int(np.sqrt(seq_length)), int(np.sqrt(seq_length))) if 'normalise' in settings and settings['normalise']: # TODO this is a mess, fix print(settings['normalise']) norm = True else: norm = False if labels is None: train, vali, test = split(samples, [0.6, 0.2, 0.2], normalise=norm) train_labels, vali_labels, test_labels = None, None, None else: train, vali, test, labels_list = split(samples, [0.6, 0.2, 0.2], normalise=norm, labels=labels) train_labels, vali_labels, test_labels = labels_list labels = dict() labels['train'], labels['vali'], labels['test'] = train_labels, vali_labels, test_labels samples = dict() samples['train'], samples['vali'], samples['test'] = train, vali, test # futz around with labels # TODO refactor cause this is messy if 'one_hot' in settings and settings['one_hot'] and not settings['data_load_from']: if len(labels['train'].shape) == 1: # ASSUME labels go from 0 to max_val inclusive, find max-val max_val = int(np.max([labels['train'].max(), labels['test'].max(), labels['vali'].max()])) # now we have max_val + 1 dimensions print('Setting cond_dim to', max_val + 1, 'from', settings['cond_dim']) settings['cond_dim'] = max_val + 1 print('Setting max_val to 1 from', settings['max_val']) settings['max_val'] = 1 labels_oh = dict() for (k, v) in labels.items(): A = np.zeros(shape=(len(v), settings['cond_dim'])) A[np.arange(len(v)), (v).astype(int)] = 1 labels_oh[k] = A labels = labels_oh else: assert settings['max_val'] == 1 # this is already one-hot! if 'predict_labels' in settings and settings['predict_labels']: samples, labels = data_utils.make_predict_labels(samples, labels) print('Setting cond_dim to 0 from', settings['cond_dim']) settings['cond_dim'] = 0 # update the settings dictionary to update erroneous settings # (mostly about the sequence length etc. - it gets set by the data!) settings['seq_length'] = samples['train'].shape[1] settings['num_samples'] = samples['train'].shape[0] + samples['vali'].shape[0] + samples['test'].shape[0] settings['num_signals'] = samples['train'].shape[2] return samples, pdf, labels def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_single, eval_an, data_options=None): """ Helper/wrapper function to get the requested data. """ print('data_type') labels = None index = None if data_type == 'load': data_dict = np.load(data_options).item() samples = data_dict['samples'] pdf = data_dict['pdf'] labels = data_dict['labels'] elif data_type == 'swat': samples, labels = swat(seq_length, seq_step, num_signals) elif data_type == 'swat_test': samples, labels, index = swat_test(seq_length, seq_step, num_signals) elif data_type == 'kdd99': samples, labels = kdd99(seq_length, seq_step, num_signals) elif data_type == 'kdd99_test': samples, labels, index = kdd99_test(seq_length, seq_step, num_signals) elif data_type == 'wadi': samples, labels = wadi(seq_length, seq_step, num_signals) elif data_type == 'wadi_test': samples, labels, index = wadi_test(seq_length, seq_step, num_signals) else: raise ValueError(data_type) print('Generated/loaded', len(samples), 'samples from data-type', data_type) return samples, labels, index def get_batch(samples, batch_size, batch_idx, labels=None): start_pos = batch_idx * batch_size end_pos = start_pos + batch_size if labels is None: return samples[start_pos:end_pos], None else: if type(labels) == tuple: # two sets of labels assert len(labels) == 2 return samples[start_pos:end_pos], labels[0][start_pos:end_pos], labels[1][start_pos:end_pos] else: assert type(labels) == np.ndarray return samples[start_pos:end_pos], labels[start_pos:end_pos] def split(samples, proportions, normalise=False, scale=False, labels=None, random_seed=None): """ Return train/validation/test split. """ if random_seed != None: random.seed(random_seed) np.random.seed(random_seed) assert np.sum(proportions) == 1 n_total = samples.shape[0] n_train = ceil(n_total * proportions[0]) n_test = ceil(n_total * proportions[2]) n_vali = n_total - (n_train + n_test) # permutation to shuffle the samples shuff = np.random.permutation(n_total) train_indices = shuff[:n_train] vali_indices = shuff[n_train:(n_train + n_vali)] test_indices = shuff[(n_train + n_vali):] # TODO when we want to scale we can just return the indices assert len(set(train_indices).intersection(vali_indices)) == 0 assert len(set(train_indices).intersection(test_indices)) == 0 assert len(set(vali_indices).intersection(test_indices)) == 0 # split up the samples train = samples[train_indices] vali = samples[vali_indices] test = samples[test_indices] # apply the same normalisation scheme to all parts of the split if normalise: if scale: raise ValueError(normalise, scale) # mutually exclusive train, vali, test = normalise_data(train, vali, test) elif scale: train, vali, test = scale_data(train, vali, test) if labels is None: return train, vali, test else: print('Splitting labels...') if type(labels) == np.ndarray: train_labels = labels[train_indices] vali_labels = labels[vali_indices] test_labels = labels[test_indices] labels_split = [train_labels, vali_labels, test_labels] elif type(labels) == dict: # more than one set of labels! (weird case) labels_split = dict() for (label_name, label_set) in labels.items(): train_labels = label_set[train_indices] vali_labels = label_set[vali_indices] test_labels = label_set[test_indices] labels_split[label_name] = [train_labels, vali_labels, test_labels] else: raise ValueError(type(labels)) return train, vali, test, labels_split ================================================ FILE: differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py ================================================ # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy, # possibly with some small edits by @corcra) # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Differentially private optimizers. """ from __future__ import division import tensorflow as tf from differential_privacy.dp_sgd.dp_optimizer import utils #from differential_privacy.dp_sgd.per_example_gradients import per_example_gradients import pdb class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer): """Differentially private gradient descent optimizer. """ def __init__(self, learning_rate, eps_delta, sanitizer, sigma=None, use_locking=False, name="DPGradientDescent", batches_per_lot=1): """Construct a differentially private gradient descent optimizer. The optimizer uses fixed privacy budget for each batch of training. Args: learning_rate: for GradientDescentOptimizer. eps_delta: EpsDelta pair for each epoch. sanitizer: for sanitizing the graident. sigma: noise sigma. If None, use eps_delta pair to compute sigma; otherwise use supplied sigma directly. use_locking: use locking. name: name for the object. batches_per_lot: Number of batches in a lot. """ super(DPGradientDescentOptimizer, self).__init__(learning_rate, use_locking, name) # Also, if needed, define the gradient accumulators self._batches_per_lot = batches_per_lot self._grad_accum_dict = {} if batches_per_lot > 1: self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False, name="batch_count") var_list = tf.trainable_variables() with tf.variable_scope("grad_acc_for"): for var in var_list: v_grad_accum = tf.Variable(tf.zeros_like(var), trainable=False, name=utils.GetTensorOpName(var)) self._grad_accum_dict[var.name] = v_grad_accum self._eps_delta = eps_delta self._sanitizer = sanitizer self._sigma = sigma def compute_sanitized_gradients(self, loss, var_list=None, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) xs = [tf.convert_to_tensor(x) for x in var_list] # TODO check this change loss_list = tf.unstack(loss, axis=0) px_grads_byexample = [tf.gradients(l, xs) for l in loss_list] px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))] #px_grads = tf.gradients(loss, xs) # add a dummy 0th dimension to reflect the fact that we have a batch size of 1... # px_grads = [tf.expand_dims(x, 0) for x in px_grads] # px_grads = per_example_gradients.PerExampleGradients(loss, xs) sanitized_grads = [] for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=self._sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice( tf.shape(px_grad), [0], [1])) sanitized_grads.append(sanitized_grad) return sanitized_grads def minimize(self, loss, global_step=None, var_list=None, name=None): """Minimize using sanitized gradients. This gets a var_list which is the list of trainable variables. For each var in var_list, we defined a grad_accumulator variable during init. When batches_per_lot > 1, we accumulate the gradient update in those. At the end of each lot, we apply the update back to the variable. This has the effect that for each lot we compute gradients at the point at the beginning of the lot, and then apply one update at the end of the lot. In other words, semantically, we are doing SGD with one lot being the equivalent of one usual batch of size batch_size * batches_per_lot. This allows us to simulate larger batches than our memory size would permit. The lr and the num_steps are in the lot world. Args: loss: the loss tensor. global_step: the optional global step. var_list: the optional variables. name: the optional name. Returns: the operation that runs one step of DP gradient descent. """ # First validate the var_list if var_list is None: var_list = tf.trainable_variables() for var in var_list: if not isinstance(var, tf.Variable): raise TypeError("Argument is not a variable.Variable: %s" % var) # Modification: apply gradient once every batches_per_lot many steps. # This may lead to smaller error if self._batches_per_lot == 1: sanitized_grads = self.compute_sanitized_gradients( loss, var_list=var_list) grads_and_vars = list(zip(sanitized_grads, var_list)) self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None]) apply_grads = self.apply_gradients(grads_and_vars, global_step=global_step, name=name) return apply_grads # Condition for deciding whether to accumulate the gradient # or actually apply it. # we use a private self_batch_count to keep track of number of batches. # global step will count number of lots processed. update_cond = tf.equal(tf.constant(0), tf.mod(self._batch_count, tf.constant(self._batches_per_lot))) # Things to do for batches other than last of the lot. # Add non-noisy clipped grads to shadow variables. def non_last_in_lot_op(loss, var_list): """Ops to do for a typical batch. For a batch that is not the last one in the lot, we simply compute the sanitized gradients and apply them to the grad_acc variables. Args: loss: loss function tensor var_list: list of variables Returns: A tensorflow op to do the updates to the gradient accumulators """ sanitized_grads = self.compute_sanitized_gradients( loss, var_list=var_list, add_noise=False) update_ops_list = [] for var, grad in zip(var_list, sanitized_grads): grad_acc_v = self._grad_accum_dict[var.name] update_ops_list.append(grad_acc_v.assign_add(grad)) update_ops_list.append(self._batch_count.assign_add(1)) return tf.group(*update_ops_list) # Things to do for last batch of a lot. # Add noisy clipped grads to accumulator. # Apply accumulated grads to vars. def last_in_lot_op(loss, var_list, global_step): """Ops to do for last batch in a lot. For the last batch in the lot, we first add the sanitized gradients to the gradient acc variables, and then apply these values over to the original variables (via an apply gradient) Args: loss: loss function tensor var_list: list of variables global_step: optional global step to be passed to apply_gradients Returns: A tensorflow op to push updates from shadow vars to real vars. """ # We add noise in the last lot. This is why we need this code snippet # that looks almost identical to the non_last_op case here. sanitized_grads = self.compute_sanitized_gradients( loss, var_list=var_list, add_noise=True) normalized_grads = [] for var, grad in zip(var_list, sanitized_grads): grad_acc_v = self._grad_accum_dict[var.name] # To handle the lr difference per lot vs per batch, we divide the # update by number of batches per lot. normalized_grad = tf.div(grad_acc_v.assign_add(grad), tf.to_float(self._batches_per_lot)) normalized_grads.append(normalized_grad) with tf.control_dependencies(normalized_grads): grads_and_vars = list(zip(normalized_grads, var_list)) self._assert_valid_dtypes( [v for g, v in grads_and_vars if g is not None]) apply_san_grads = self.apply_gradients(grads_and_vars, global_step=global_step, name="apply_grads") # Now reset the accumulators to zero resets_list = [] with tf.control_dependencies([apply_san_grads]): for _, acc in self._grad_accum_dict.items(): reset = tf.assign(acc, tf.zeros_like(acc)) resets_list.append(reset) resets_list.append(self._batch_count.assign_add(1)) last_step_update = tf.group(*([apply_san_grads] + resets_list)) return last_step_update # pylint: disable=g-long-lambda update_op = tf.cond(update_cond, lambda: last_in_lot_op( loss, var_list, global_step), lambda: non_last_in_lot_op( loss, var_list)) return tf.group(update_op) ================================================ FILE: differential_privacy/dp_sgd/dp_optimizer/sanitizer.py ================================================ # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy, # possibly with some small edits by @corcra) # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines Sanitizer class for sanitizing tensors. A sanitizer first limits the sensitivity of a tensor and then adds noise to the tensor. The parameters are determined by the privacy_spending and the other parameters. It also uses an accountant to keep track of the privacy spending. """ from __future__ import division import collections import tensorflow as tf from differential_privacy.dp_sgd.dp_optimizer import utils import pdb ClipOption = collections.namedtuple("ClipOption", ["l2norm_bound", "clip"]) class AmortizedGaussianSanitizer(object): """Sanitizer with Gaussian noise and amoritzed privacy spending accounting. This sanitizes a tensor by first clipping the tensor, summing the tensor and then adding appropriate amount of noise. It also uses an amortized accountant to keep track of privacy spending. """ def __init__(self, accountant, default_option): """Construct an AmortizedGaussianSanitizer. Args: accountant: the privacy accountant. Expect an amortized one. default_option: the default ClipOptoin. """ self._accountant = accountant self._default_option = default_option self._options = {} def set_option(self, tensor_name, option): """Set options for an individual tensor. Args: tensor_name: the name of the tensor. option: clip option. """ self._options[tensor_name] = option def sanitize(self, x, eps_delta, sigma=None, option=ClipOption(None, None), tensor_name=None, num_examples=None, add_noise=True): """Sanitize the given tensor. This santize a given tensor by first applying l2 norm clipping and then adding Gaussian noise. It calls the privacy accountant for updating the privacy spending. Args: x: the tensor to sanitize. eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to compute sigma if sigma is None. sigma: if sigma is not None, use sigma. option: a ClipOption which, if supplied, used for clipping and adding noise. tensor_name: the name of the tensor. num_examples: if None, use the number of "rows" of x. add_noise: if True, then add noise, else just clip. Returns: a pair of sanitized tensor and the operation to accumulate privacy spending. """ if sigma is None: # pylint: disable=unpacking-non-sequence eps, delta = eps_delta with tf.control_dependencies( [tf.Assert(tf.greater(eps, 0), ["eps needs to be greater than 0"]), tf.Assert(tf.greater(delta, 0), ["delta needs to be greater than 0"])]): # The following formula is taken from # Dwork and Roth, The Algorithmic Foundations of Differential # Privacy, Appendix A. # http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps l2norm_bound, clip = option if l2norm_bound is None: l2norm_bound, clip = self._default_option if ((tensor_name is not None) and (tensor_name in self._options)): l2norm_bound, clip = self._options[tensor_name] if clip: x = utils.BatchClipByL2norm(x, l2norm_bound) if add_noise: if num_examples is None: num_examples = tf.slice(tf.shape(x), [0], [1]) privacy_accum_op = self._accountant.accumulate_privacy_spending( eps_delta, sigma, num_examples) with tf.control_dependencies([privacy_accum_op]): saned_x = utils.AddGaussianNoise(tf.reduce_sum(x, 0), sigma * l2norm_bound) else: saned_x = tf.reduce_sum(x, 0) return saned_x ================================================ FILE: differential_privacy/dp_sgd/dp_optimizer/utils.py ================================================ # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy, # possibly with some small edits by @corcra) # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils for building and training NN models. """ from __future__ import division import math import numpy import tensorflow as tf class LayerParameters(object): """class that defines a non-conv layer.""" def __init__(self): self.name = "" self.num_units = 0 self._with_bias = False self.relu = False self.gradient_l2norm_bound = 0.0 self.bias_gradient_l2norm_bound = 0.0 self.trainable = True self.weight_decay = 0.0 class ConvParameters(object): """class that defines a conv layer.""" def __init__(self): self.patch_size = 5 self.stride = 1 self.in_channels = 1 self.out_channels = 0 self.with_bias = True self.relu = True self.max_pool = True self.max_pool_size = 2 self.max_pool_stride = 2 self.trainable = False self.in_size = 28 self.name = "" self.num_outputs = 0 self.bias_stddev = 0.1 # Parameters for a layered neural network. class NetworkParameters(object): """class that define the overall model structure.""" def __init__(self): self.input_size = 0 self.projection_type = 'NONE' # NONE, RANDOM, PCA self.projection_dimensions = 0 self.default_gradient_l2norm_bound = 0.0 self.layer_parameters = [] # List of LayerParameters self.conv_parameters = [] # List of ConvParameters def GetTensorOpName(x): """Get the name of the op that created a tensor. Useful for naming related tensors, as ':' in name field of op is not permitted Args: x: the input tensor. Returns: the name of the op. """ t = x.name.rsplit(":", 1) if len(t) == 1: return x.name else: return t[0] def BuildNetwork(inputs, network_parameters): """Build a network using the given parameters. Args: inputs: a Tensor of floats containing the input data. network_parameters: NetworkParameters object that describes the parameters for the network. Returns: output, training_parameters: where the outputs (a tensor) is the output of the network, and training_parameters (a dictionary that maps the name of each variable to a dictionary of parameters) is the parameters used during training. """ training_parameters = {} num_inputs = network_parameters.input_size outputs = inputs projection = None # First apply convolutions, if needed for conv_param in network_parameters.conv_parameters: outputs = tf.reshape( outputs, [-1, conv_param.in_size, conv_param.in_size, conv_param.in_channels]) conv_weights_name = "%s_conv_weight" % (conv_param.name) conv_bias_name = "%s_conv_bias" % (conv_param.name) conv_std_dev = 1.0 / (conv_param.patch_size * math.sqrt(conv_param.in_channels)) conv_weights = tf.Variable( tf.truncated_normal([conv_param.patch_size, conv_param.patch_size, conv_param.in_channels, conv_param.out_channels], stddev=conv_std_dev), trainable=conv_param.trainable, name=conv_weights_name) conv_bias = tf.Variable( tf.truncated_normal([conv_param.out_channels], stddev=conv_param.bias_stddev), trainable=conv_param.trainable, name=conv_bias_name) training_parameters[conv_weights_name] = {} training_parameters[conv_bias_name] = {} conv = tf.nn.conv2d(outputs, conv_weights, strides=[1, conv_param.stride, conv_param.stride, 1], padding="SAME") relud = tf.nn.relu(conv + conv_bias) mpd = tf.nn.max_pool(relud, ksize=[1, conv_param.max_pool_size, conv_param.max_pool_size, 1], strides=[1, conv_param.max_pool_stride, conv_param.max_pool_stride, 1], padding="SAME") outputs = mpd num_inputs = conv_param.num_outputs # this should equal # in_size * in_size * out_channels / (stride * max_pool_stride) # once all the convs are done, reshape to make it flat outputs = tf.reshape(outputs, [-1, num_inputs]) # Now project, if needed if network_parameters.projection_type is not "NONE": projection = tf.Variable(tf.truncated_normal( [num_inputs, network_parameters.projection_dimensions], stddev=1.0 / math.sqrt(num_inputs)), trainable=False, name="projection") num_inputs = network_parameters.projection_dimensions outputs = tf.matmul(outputs, projection) # Now apply any other layers for layer_parameters in network_parameters.layer_parameters: num_units = layer_parameters.num_units hidden_weights_name = "%s_weight" % (layer_parameters.name) hidden_weights = tf.Variable( tf.truncated_normal([num_inputs, num_units], stddev=1.0 / math.sqrt(num_inputs)), name=hidden_weights_name, trainable=layer_parameters.trainable) training_parameters[hidden_weights_name] = {} if layer_parameters.gradient_l2norm_bound: training_parameters[hidden_weights_name]["gradient_l2norm_bound"] = ( layer_parameters.gradient_l2norm_bound) if layer_parameters.weight_decay: training_parameters[hidden_weights_name]["weight_decay"] = ( layer_parameters.weight_decay) outputs = tf.matmul(outputs, hidden_weights) if layer_parameters.with_bias: hidden_biases_name = "%s_bias" % (layer_parameters.name) hidden_biases = tf.Variable(tf.zeros([num_units]), name=hidden_biases_name) training_parameters[hidden_biases_name] = {} if layer_parameters.bias_gradient_l2norm_bound: training_parameters[hidden_biases_name][ "bias_gradient_l2norm_bound"] = ( layer_parameters.bias_gradient_l2norm_bound) outputs += hidden_biases if layer_parameters.relu: outputs = tf.nn.relu(outputs) # num_inputs for the next layer is num_units in the current layer. num_inputs = num_units return outputs, projection, training_parameters def VaryRate(start, end, saturate_epochs, epoch): """Compute a linearly varying number. Decrease linearly from start to end until epoch saturate_epochs. Args: start: the initial number. end: the end number. saturate_epochs: after this we do not reduce the number; if less than or equal to zero, just return start. epoch: the current learning epoch. Returns: the caculated number. """ if saturate_epochs <= 0: return start step = (start - end) / (saturate_epochs - 1) if epoch < saturate_epochs: return start - step * epoch else: return end def BatchClipByL2norm(t, upper_bound, name=None): """Clip an array of tensors by L2 norm. Shrink each dimension-0 slice of tensor (for matrix it is each row) such that the l2 norm is at most upper_bound. Here we clip each row as it corresponds to each example in the batch. Args: t: the input tensor. upper_bound: the upperbound of the L2 norm. name: optional name. Returns: the clipped tensor. """ assert upper_bound > 0 with tf.name_scope(values=[t, upper_bound], name=name, default_name="batch_clip_by_l2norm") as name: saved_shape = tf.shape(t) batch_size = tf.slice(saved_shape, [0], [1]) t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]])) upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]), tf.constant(1.0/upper_bound)) # Add a small number to avoid divide by 0 l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001) scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound clipped_t = tf.matmul(tf.diag(scale), t2) clipped_t = tf.reshape(clipped_t, saved_shape, name=name) return clipped_t def SoftThreshold(t, threshold_ratio, name=None): """Soft-threshold a tensor by the mean value. Softthreshold each dimension-0 vector (for matrix it is each column) by the mean of absolute value multiplied by the threshold_ratio factor. Here we soft threshold each column as it corresponds to each unit in a layer. Args: t: the input tensor. threshold_ratio: the threshold ratio. name: the optional name for the returned tensor. Returns: the thresholded tensor, where each entry is soft-thresholded by threshold_ratio times the mean of the aboslute value of each column. """ assert threshold_ratio >= 0 with tf.name_scope(values=[t, threshold_ratio], name=name, default_name="soft_thresholding") as name: saved_shape = tf.shape(t) t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1])) t_abs = tf.abs(t2) t_x = tf.sign(t2) * tf.nn.relu(t_abs - (tf.reduce_mean(t_abs, [0], keep_dims=True) * threshold_ratio)) return tf.reshape(t_x, saved_shape, name=name) def AddGaussianNoise(t, sigma, name=None): """Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t. Args: t: the input tensor. sigma: the stddev of the Gaussian noise. name: optional name. Returns: the noisy tensor. """ with tf.name_scope(values=[t, sigma], name=name, default_name="add_gaussian_noise") as name: noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma) return noisy_t def GenerateBinomialTable(m): """Generate binomial table. Args: m: the size of the table. Returns: A two dimensional array T where T[i][j] = (i choose j), for 0<= i, j <=m. """ table = numpy.zeros((m + 1, m + 1), dtype=numpy.float64) for i in range(m + 1): table[i, 0] = 1 for i in range(1, m + 1): for j in range(1, m + 1): v = table[i - 1, j] + table[i - 1, j -1] assert not math.isnan(v) and not math.isinf(v) table[i, j] = v return tf.convert_to_tensor(table) ================================================ FILE: differential_privacy/privacy_accountant/tf/accountant.py ================================================ # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy, # possibly with some small edits by @corcra) # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines Accountant class for keeping track of privacy spending. A privacy accountant keeps track of privacy spendings. It has methods accumulate_privacy_spending and get_privacy_spent. Here we only define AmortizedAccountant which tracks the privacy spending in the amortized way. It uses privacy amplication via sampling to compute the privacy spending for each batch and strong composition (specialized for Gaussian noise) for accumulate the privacy spending. """ from __future__ import division import abc import collections import math import sys import numpy import tensorflow as tf from differential_privacy.dp_sgd.dp_optimizer import utils EpsDelta = collections.namedtuple("EpsDelta", ["spent_eps", "spent_delta"]) import pdb # TODO(liqzhang) To ensure the same API for AmortizedAccountant and # MomentsAccountant, we pass the union of arguments to both, so we # have unused_sigma for AmortizedAccountant and unused_eps_delta for # MomentsAccountant. Consider to revise the API to avoid the unused # arguments. It would be good to use @abc.abstractmethod, etc, to # define the common interface as a base class. class AmortizedAccountant(object): """Keep track of privacy spending in an amortized way. AmortizedAccountant accumulates the privacy spending by assuming all the examples are processed uniformly at random so the spending is amortized among all the examples. And we assume that we use Gaussian noise so the accumulation is on eps^2 and delta, using advanced composition. """ def __init__(self, total_examples): """Initialization. Currently only support amortized tracking. Args: total_examples: total number of examples. """ assert total_examples > 0 self._total_examples = total_examples self._eps_squared_sum = tf.Variable(tf.zeros([1]), trainable=False, name="eps_squared_sum") self._delta_sum = tf.Variable(tf.zeros([1]), trainable=False, name="delta_sum") def accumulate_privacy_spending(self, eps_delta, unused_sigma, num_examples): """Accumulate the privacy spending. Currently only support approximate privacy. Here we assume we use Gaussian noise on randomly sampled batch so we get better composition: 1. the per batch privacy is computed using privacy amplication via sampling bound; 2. the composition is done using the composition with Gaussian noise. TODO(liqzhang) Add a link to a document that describes the bounds used. Args: eps_delta: EpsDelta pair which can be tensors. unused_sigma: the noise sigma. Unused for this accountant. num_examples: the number of examples involved. Returns: a TensorFlow operation for updating the privacy spending. """ eps, delta = eps_delta with tf.control_dependencies( [tf.Assert(tf.greater(delta, 0), ["delta needs to be greater than 0"])]): amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 / self._total_examples) # Use privacy amplification via sampling bound. # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf # TODO(liqzhang) Add a link to a document with formal statement # and proof. amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * ( tf.exp(eps) - 1.0)), [1]) amortize_delta = tf.reshape(amortize_ratio * delta, [1]) return tf.group(*[tf.assign_add(self._eps_squared_sum, tf.square(amortize_eps)), tf.assign_add(self._delta_sum, amortize_delta)]) def get_privacy_spent(self, sess, target_eps=None): """Report the spending so far. Args: sess: the session to run the tensor. target_eps: the target epsilon. Unused. Returns: the list containing a single EpsDelta, with values as Python floats (as opposed to numpy.float64). This is to be consistent with MomentAccountant which can return a list of (eps, delta) pair. """ # pylint: disable=unused-argument unused_target_eps = target_eps eps_squared_sum, delta_sum = sess.run([self._eps_squared_sum, self._delta_sum]) return [EpsDelta(math.sqrt(eps_squared_sum), float(delta_sum))] class MomentsAccountant(object): """Privacy accountant which keeps track of moments of privacy loss. Note: The constructor of this class creates tf.Variables that must be initialized with tf.global_variables_initializer() or similar calls. MomentsAccountant accumulates the high moments of the privacy loss. It requires a method for computing differenital moments of the noise (See below for the definition). So every specific accountant should subclass this class by implementing _differential_moments method. Denote by X_i the random variable of privacy loss at the i-th step. Consider two databases D, D' which differ by one item. X_i takes value log Pr[M(D')==x]/Pr[M(D)==x] with probability Pr[M(D)==x]. In MomentsAccountant, we keep track of y_i(L) = log E[exp(L X_i)] for some large enough L. To compute the final privacy spending, we apply Chernoff bound (assuming the random noise added at each step is independent) to bound the total privacy loss Z = sum X_i as follows: Pr[Z > e] = Pr[exp(L Z) > exp(L e)] < E[exp(L Z)] / exp(L e) = Prod_i E[exp(L X_i)] / exp(L e) = exp(sum_i log E[exp(L X_i)]) / exp(L e) = exp(sum_i y_i(L) - L e) Hence the mechanism is (e, d)-differentially private for d = exp(sum_i y_i(L) - L e). We require d < 1, i.e. e > sum_i y_i(L) / L. We maintain y_i(L) for several L to compute the best d for any give e (normally should be the lowest L such that 2 * sum_i y_i(L) / L < e. We further assume that at each step, the mechanism operates on a random sample with sampling probability q = batch_size / total_examples. Then E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L] By distinguishing two cases of whether D < D' or D' < D, we have that E[exp(L X)] <= max (I1, I2) where I1 = (1-q) E ((1-q) + q P(X+1) / P(X))^L + q E ((1-q) + q P(X) / P(X-1))^L I2 = E (P(X) / ((1-q) + q P(X+1)))^L In order to compute I1 and I2, one can consider to 1. use an asymptotic bound, which recovers the advance composition theorem; 2. use the closed formula (like GaussianMomentsAccountant); 3. use numerical integration or random sample estimation. Dependent on the distribution, we can often obtain a tigher estimation on the moments and hence a more accurate estimation of the privacy loss than obtained using generic composition theorems. """ __metaclass__ = abc.ABCMeta def __init__(self, total_examples, moment_orders=32): """Initialize a MomentsAccountant. Args: total_examples: total number of examples. moment_orders: the order of moments to keep. """ assert total_examples > 0 self._total_examples = total_examples self._moment_orders = (moment_orders if isinstance(moment_orders, (list, tuple)) else range(1, moment_orders + 1)) self._max_moment_order = max(self._moment_orders) assert self._max_moment_order < 100, "The moment order is too large." self._log_moments = [tf.Variable(numpy.float64(0.0), trainable=False, name=("log_moments-%d" % moment_order)) for moment_order in self._moment_orders] @abc.abstractmethod def _compute_log_moment(self, sigma, q, moment_order): """Compute high moment of privacy loss. Args: sigma: the noise sigma, in the multiples of the sensitivity. q: the sampling ratio. moment_order: the order of moment. Returns: log E[exp(moment_order * X)] """ pass def accumulate_privacy_spending(self, unused_eps_delta, sigma, num_examples): """Accumulate privacy spending. In particular, accounts for privacy spending when we assume there are num_examples, and we are releasing the vector (sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma) where l2norm_bound is the maximum l2_norm of each example x_i, and the num_examples have been randomly selected out of a pool of self.total_examples. Args: unused_eps_delta: EpsDelta pair which can be tensors. Unused in this accountant. sigma: the noise sigma, in the multiples of the sensitivity (that is, if the l2norm sensitivity is k, then the caller must have added Gaussian noise with stddev=k*sigma to the result of the query). num_examples: the number of examples involved. Returns: a TensorFlow operation for updating the privacy spending. """ q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples moments_accum_ops = [] for i in range(len(self._log_moments)): moment = self._compute_log_moment(sigma, q, self._moment_orders[i]) moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment)) return tf.group(*moments_accum_ops) def _compute_delta(self, log_moments, eps): """Compute delta for given log_moments and eps. Args: log_moments: the log moments of privacy loss, in the form of pairs of (moment_order, log_moment) eps: the target epsilon. Returns: delta """ min_delta = 1.0 for moment_order, log_moment in log_moments: if math.isinf(log_moment) or math.isnan(log_moment): sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order) continue if log_moment < moment_order * eps: min_delta = min(min_delta, math.exp(log_moment - moment_order * eps)) return min_delta def _compute_eps(self, log_moments, delta): min_eps = float("inf") for moment_order, log_moment in log_moments: if math.isinf(log_moment) or math.isnan(log_moment): sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order) continue min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order) return min_eps def get_privacy_spent(self, sess, target_eps=None, target_deltas=None): """Compute privacy spending in (e, d)-DP form for a single or list of eps. Args: sess: the session to run the tensor. target_eps: a list of target epsilon's for which we would like to compute corresponding delta value. target_deltas: a list of target deltas for which we would like to compute the corresponding eps value. Caller must specify either target_eps or target_delta. Returns: A list of EpsDelta pairs. """ assert (target_eps is None) ^ (target_deltas is None) eps_deltas = [] log_moments = sess.run(self._log_moments) log_moments_with_order = numpy.array(list(zip(self._moment_orders, log_moments))) if target_eps is not None: for eps in target_eps: delta = self._compute_delta(log_moments_with_order, eps) eps_deltas.append(EpsDelta(eps, delta)) else: assert target_deltas for delta in target_deltas: eps_deltas.append( EpsDelta(self._compute_eps(log_moments_with_order, delta), delta)) return eps_deltas class GaussianMomentsAccountant(MomentsAccountant): """MomentsAccountant which assumes Gaussian noise. GaussianMomentsAccountant assumes the noise added is centered Gaussian noise N(0, sigma^2 I). In this case, we can compute the differential moments accurately using a formula. For asymptotic bound, for Gaussian noise with variance sigma^2, we can show for L < sigma^2, q L < sigma, log E[exp(L X)] = O(q^2 L^2 / sigma^2). Using this we derive that for training T epoches, with batch ratio q, the Gaussian mechanism with variance sigma^2 (with q < 1/sigma) is (e, d) private for d = exp(T/q q^2 L^2 / sigma^2 - L e). Setting L = sigma^2, Tq = e/2, the mechanism is (e, exp(-e sigma^2/2))-DP. Equivalently, the mechanism is (e, d)-DP if sigma = sqrt{2 log(1/d)}/e, q < 1/sigma, and T < e/(2q). This bound is better than the bound obtained using general composition theorems, by an Omega(sqrt{log k}) factor on epsilon, if we run k steps. Since we use direct estimate, the obtained privacy bound has tight constant. For GaussianMomentAccountant, it suffices to compute I1, as I1 >= I2, which reduce to computing E(P(x+s)/P(x+s-1) - 1)^i for s = 0 and 1. In the companion gaussian_moments.py file, we supply procedure for computing both I1 and I2 (the computation of I2 is through multi-precision integration package). It can be verified that indeed I1 >= I2 for wide range of parameters we have tried, though at the moment we are unable to prove this claim. We recommend that when using this accountant, users independently verify using gaussian_moments.py that for their parameters, I1 is indeed larger than I2. This can be done by following the instructions in gaussian_moments.py. """ def __init__(self, total_examples, moment_orders=32): """Initialization. Args: total_examples: total number of examples. moment_orders: the order of moments to keep. """ super(self.__class__, self).__init__(total_examples, moment_orders) self._binomial_table = utils.GenerateBinomialTable(self._max_moment_order) def _differential_moments(self, sigma, s, t): """Compute 0 to t-th differential moments for Gaussian variable. E[(P(x+s)/P(x+s-1)-1)^t] = sum_{i=0}^t (t choose i) (-1)^{t-i} E[(P(x+s)/P(x+s-1))^i] = sum_{i=0}^t (t choose i) (-1)^{t-i} E[exp(-i*(2*x+2*s-1)/(2*sigma^2))] = sum_{i=0}^t (t choose i) (-1)^{t-i} exp(i(i+1-2*s)/(2 sigma^2)) Args: sigma: the noise sigma, in the multiples of the sensitivity. s: the shift. t: 0 to t-th moment. Returns: 0 to t-th moment as a tensor of shape [t+1]. """ assert t <= self._max_moment_order, ("The order of %d is out " "of the upper bound %d." % (t, self._max_moment_order)) binomial = tf.slice(self._binomial_table, [0, 0], [t + 1, t + 1]) signs = numpy.zeros((t + 1, t + 1), dtype=numpy.float64) for i in range(t + 1): for j in range(t + 1): signs[i, j] = 1.0 - 2 * ((i - j) % 2) exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma) for j in range(t + 1)], dtype=tf.float64) # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j} x = tf.multiply(binomial, signs) # y[i, j] = x[i, j] * exp(exponents[j]) # = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2)) # Note: this computation is done by broadcasting pointwise multiplication # between [t+1, t+1] tensor and [t+1] tensor. y = tf.multiply(x, tf.exp(exponents)) # z[i] = sum_j y[i, j] # = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2)) z = tf.reduce_sum(y, 1) return z def _compute_log_moment(self, sigma, q, moment_order): """Compute high moment of privacy loss. Args: sigma: the noise sigma, in the multiples of the sensitivity. q: the sampling ratio. moment_order: the order of moment. Returns: log E[exp(moment_order * X)] """ assert moment_order <= self._max_moment_order, ("The order of %d is out " "of the upper bound %d." % (moment_order, self._max_moment_order)) binomial_table = tf.slice(self._binomial_table, [moment_order, 0], [1, moment_order + 1]) # qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q)) qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)], dtype=tf.float64) * tf.cast( tf.log(q), dtype=tf.float64)) moments0 = self._differential_moments(sigma, 0.0, moment_order) term0 = tf.reduce_sum(binomial_table * qs * moments0) moments1 = self._differential_moments(sigma, 1.0, moment_order) term1 = tf.reduce_sum(binomial_table * qs * moments1) return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1, tf.float64))) class DummyAccountant(object): """An accountant that does no accounting.""" def accumulate_privacy_spending(self, *unused_args): return tf.no_op() def get_privacy_spent(self, unused_sess, **unused_kwargs): return [EpsDelta(numpy.inf, 1.0)] ================================================ FILE: eugenium_mmd.py ================================================ ''' Code taken from: https://github.com/eugenium/mmd (modified slightly for efficiency/PEP by Stephanie Hyland) Python implementation of MMD and Covariance estimates for Relative MMD Some code is based on code from Vincent Van Asch which is based on matlab code from Arthur Gretton Eugene Belilovsky eugene.belilovsky@inria.fr ''' import numpy as np import scipy as sp from numpy import sqrt from sklearn.metrics.pairwise import rbf_kernel from functools import partial import pdb def my_kernel(X, Y, sigma): gamma = 1 / (2 * sigma**2) if len(X.shape) == 2: X_sqnorms = np.einsum('...i,...i', X, X) Y_sqnorms = np.einsum('...i,...i', Y, Y) XY = np.einsum('ia,ja', X, Y) elif len(X.shape) == 3: X_sqnorms = np.einsum('...ij,...ij', X, X) Y_sqnorms = np.einsum('...ij,...ij', Y, Y) XY = np.einsum('iab,jab', X, Y) else: pdb.set_trace() Kxy = np.exp(-gamma*(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1))) return Kxy def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=False): '''Performs the relative MMD test which returns a test statistic for whether Y is closer to X or than Z. See http://arxiv.org/pdf/1511.04581.pdf The bandwith heuristic is based on the median heuristic (see Smola,Gretton). ''' if(sigma<0): #Similar heuristics if SelectSigma: siz=np.min((1000, X.shape[0])) sigma1=kernelwidthPair(X[0:siz], Y[0:siz]); sigma2=kernelwidthPair(X[0:siz], Z[0:siz]); sigma=(sigma1+sigma2)/2. else: siz=np.min((1000, X.shape[0]*3)) Zem=np.r_[X[0:siz/3], Y[0:siz/3], Z[0:siz/3]] sigma=kernelwidth(Zem); #kernel = partial(rbf_kernel, gamma=1.0/(sigma**2)) kernel = partial(my_kernel, sigma=sigma) #kernel = partial(grbf, sigma=sigma) Kyy = kernel(Y, Y) Kzz = kernel(Z, Z) Kxy = kernel(X, Y) Kxz = kernel(X, Z) Kyynd = Kyy-np.diag(np.diagonal(Kyy)) Kzznd = Kzz-np.diag(np.diagonal(Kzz)) m = Kxy.shape[0]; n = Kyy.shape[0]; r = Kzz.shape[0]; u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) ) u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) ) u_xy=np.sum(Kxy)/(m*n) u_xz=np.sum(Kxz)/(m*r) #Compute the test statistic t=u_yy - 2.*u_xy - (u_zz-2.*u_xz) Diff_Var, Diff_Var_z2, data=MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz) pvalue=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var))) # pvalue_z2=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var_z2))) tstat=t/sqrt(Diff_Var) if(computeMMDs): Kxx = kernel(X, X) Kxxnd = Kxx-np.diag(np.diagonal(Kxx)) u_xx=np.sum(Kxxnd)*( 1./(m*(m-1)) ) MMDXY=u_xx+u_yy-2.*u_xy MMDXZ=u_xx+u_zz-2.*u_xz else: MMDXY=None MMDXZ=None return pvalue, tstat, sigma, MMDXY, MMDXZ def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz): ''' Compute the variance of the difference statistic MMDXY-MMDXZ See http://arxiv.org/pdf/1511.04581.pdf Appendix for derivations ''' m = Kxy.shape[0]; n = Kyy.shape[0]; r = Kzz.shape[0]; Kyynd = Kyy-np.diag(np.diagonal(Kyy)); Kzznd = Kzz-np.diag(np.diagonal(Kzz)); u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) ); u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) ); u_xy=np.sum(Kxy)/(m*n); u_xz=np.sum(Kxz)/(m*r); #compute zeta1 t1=(1./n**3)*np.sum(Kyynd.T.dot(Kyynd))-u_yy**2; t2=(1./(n**2*m))*np.sum(Kxy.T.dot(Kxy))-u_xy**2; t3=(1./(n*m**2))*np.sum(Kxy.dot(Kxy.T))-u_xy**2; t4=(1./r**3)*np.sum(Kzznd.T.dot(Kzznd))-u_zz**2; t5=(1./(r*m**2))*np.sum(Kxz.dot(Kxz.T))-u_xz**2; t6=(1./(r**2*m))*np.sum(Kxz.T.dot(Kxz))-u_xz**2; t7=(1./(n**2*m))*np.sum(Kyynd.dot(Kxy.T))-u_yy*u_xy; t8=(1./(n*m*r))*np.sum(Kxy.T.dot(Kxz))-u_xz*u_xy; t9=(1./(r**2*m))*np.sum(Kzznd.dot(Kxz.T))-u_zz*u_xz; zeta1=(t1+t2+t3+t4+t5+t6-2.*(t7+t8+t9)); zeta2=(1/m/(m-1))*np.sum((Kyynd-Kzznd-Kxy.T-Kxy+Kxz+Kxz.T)**2)-(u_yy - 2.*u_xy - (u_zz-2.*u_xz))**2; data=dict({'t1':t1, 't2':t2, 't3':t3, 't4':t4, 't5':t5, 't6':t6, 't7':t7, 't8':t8, 't9':t9, 'zeta1':zeta1, 'zeta2':zeta2, }) #TODO more precise version for zeta2 # xx=(1/m^2)*sum(sum(Kxxnd.*Kxxnd))-u_xx^2; # yy=(1/n^2)*sum(sum(Kyynd.*Kyynd))-u_yy^2; #xy=(1/(n*m))*sum(sum(Kxy.*Kxy))-u_xy^2; #xxy=(1/(n*m^2))*sum(sum(Kxxnd*Kxy))-u_xx*u_xy; #yyx=(1/(n^2*m))*sum(sum(Kyynd*Kxy'))-u_yy*u_xy; #zeta2=(xx+yy+xy+xy-2*(xxy+xxy +yyx+yyx)) Var=(4.*(m-2)/(m*(m-1)))*zeta1; Var_z2=Var+(2./(m*(m-1)))*zeta2; return Var, Var_z2, data def grbf(x1, x2, sigma): '''Calculates the Gaussian radial base function kernel''' n, nfeatures = x1.shape m, mfeatures = x2.shape k1 = np.sum((x1*x1), 1) q = np.tile(k1, (m, 1)).transpose() del k1 k2 = np.sum((x2*x2), 1) r = np.tile(k2.T, (n, 1)) del k2 h = q + r del q, r # The norm h = h - 2*np.dot(x1, x2.transpose()) h = np.array(h, dtype=float) return np.exp(-1.*h/(2.*pow(sigma, 2))) def kernelwidthPair(x1, x2): '''Implementation of the median heuristic. See Gretton 2012 Pick sigma such that the exponent of exp(- ||x-y|| / (2*sigma2)), in other words ||x-y|| / (2*sigma2), equals 1 for the median distance x and y of all distances between points from both data sets X and Y. ''' n, nfeatures = x1.shape m, mfeatures = x2.shape k1 = np.sum((x1*x1), 1) q = np.tile(k1, (m, 1)).transpose() del k1 k2 = np.sum((x2*x2), 1) r = np.tile(k2, (n, 1)) del k2 h= q + r del q, r # The norm h = h - 2*np.dot(x1, x2.transpose()) h = np.array(h, dtype=float) mdist = np.median([i for i in h.flat if i]) sigma = sqrt(mdist/2.0) if not sigma: sigma = 1 return sigma def kernelwidth(Zmed): '''Alternative median heuristic when we cant partition the points ''' m= Zmed.shape[0] k1 = np.expand_dims(np.sum((Zmed*Zmed), axis=1), 1) q = np.kron(np.ones((1, m)), k1) r = np.kron(np.ones((m, 1)), k1.T) del k1 h= q + r del q, r # The norm h = h - 2.*Zmed.dot(Zmed.T) h = np.array(h, dtype=float) mdist = np.median([i for i in h.flat if i]) sigma = sqrt(mdist/2.0) if not sigma: sigma = 1 return sigma def MMD_unbiased(Kxx, Kyy, Kxy): #The estimate when distribution of x is not equal to y m = Kxx.shape[0] n = Kyy.shape[0] t1 = (1./(m*(m-1)))*np.sum(Kxx - np.diag(np.diagonal(Kxx))) t2 = (2./(m*n)) * np.sum(Kxy) t3 = (1./(n*(n-1)))* np.sum(Kyy - np.diag(np.diagonal(Kyy))) MMDsquared = (t1-t2+t3) return MMDsquared ================================================ FILE: eval.py ================================================ #!/usr/bin/env ipython # Evaluation of models # import json import pdb import numpy as np import pandas as pd from eugenium_mmd import MMD_3_Sample_Test from scipy.stats import ks_2samp import mmd from sklearn.svm import SVC from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score from sklearn.ensemble import RandomForestClassifier import sklearn import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt # for keras import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras.backend import clear_session import model import data_utils import plotting import pickle def assert_same_data(A, B): # case 0, both loaded if A['data'] == 'load' and B['data'] == 'load': assert A['data_load_from'] == B['data_load_from'] data_path = './experiments/data/' + A['data_load_from'] elif A['data'] == 'load' and (not B['data'] == 'load'): assert A['data_load_from'] == B['identifier'] data_path = './experiments/data/' + A['data_load_from'] elif (not A['data'] == 'load') and B['data'] == 'load': assert B['data_load_from'] == A['identifier'] data_path = './experiments/data/' + A['identifier'] else: raise ValueError(A['data'], B['data']) return data_path def model_memorisation(identifier, epoch, max_samples=2000, tstr=False): """ Compare samples from a model against training set and validation set in mmd """ if tstr: print('Loading data from TSTR experiment (not sampling from model)') # load pre-generated samples synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() model_samples = synth_data['samples'] synth_labels = synth_data['labels'] # load real data used in that experiment real_data = np.load('./experiments/data/' + identifier + '.data.npy').item() real_samples = real_data['samples'] train = real_samples['train'] test = real_samples['test'] n_samples = test.shape[0] if model_samples.shape[0] > n_samples: model_samples = np.random.permutation(model_samples)[:n_samples] print('Data loaded successfully!') else: if identifier == 'cristobal_eICU': model_samples = pickle.load(open('REDACTED', 'rb')) samples, labels = data_utils.eICU_task() train = samples['train'].reshape(-1,16,4) vali = samples['vali'].reshape(-1,16,4) test = samples['test'].reshape(-1,16,4) #train_targets = labels['train'] #vali_targets = labels['vali'] #test_targets = labels['test'] train, vali, test = data_utils.scale_data(train, vali, test) n_samples = test.shape[0] if n_samples > max_samples: n_samples = max_samples test = np.random.permutation(test)[:n_samples] if model_samples.shape[0] > n_samples: model_samples = np.random.permutation(model_samples)[:n_samples] elif identifier == 'cristobal_MNIST': the_dir = 'REDACTED' # pick a random one which = np.random.choice(['NEW_OK_', '_r4', '_r5', '_r6', '_r7']) model_samples, model_labels = pickle.load(open(the_dir + 'synth_mnist_minist_cdgan_1_2_100_multivar_14_nolr_rdim3_0_2_' + which + '_190.pk', 'rb')) # get test and train... # (generated with fixed seed...) mnist_resized_dim = 14 samples, labels = data_utils.load_resized_mnist(mnist_resized_dim) proportions = [0.6, 0.2, 0.2] train, vali, test, labels_split = data_utils.split(samples, labels=labels, random_seed=1, proportions=proportions) np.random.seed() train = train.reshape(-1, 14, 14) test = test.reshape(-1, 14, 14) vali = vali.reshape(-1, 14, 14) n_samples = test.shape[0] if n_samples > max_samples: n_samples = max_samples test = np.random.permutation(test)[:n_samples] if model_samples.shape[0] > n_samples: model_samples = np.random.permutation(model_samples)[:n_samples] else: settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) # get the test, train sets data = np.load('./experiments/data/' + identifier + '.data.npy').item() train = data['samples']['train'] test = data['samples']['test'] n_samples = test.shape[0] if n_samples > max_samples: n_samples = max_samples test = np.random.permutation(test)[:n_samples] model_samples = model.sample_trained_model(settings, epoch, n_samples) all_samples = np.vstack([train, test, model_samples]) heuristic_sigma = mmd.median_pairwise_distance(all_samples) print('heuristic sigma:', heuristic_sigma) pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, test, np.random.permutation(train)[:n_samples], sigma=heuristic_sigma, computeMMDs=False) #pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, np.random.permutation(train)[:n_samples], test, sigma=heuristic_sigma, computeMMDs=False) # if pvalue < 0.05: # print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that the test data has a smaller MMD with the true data than the generated data') # the function takes (X, Y, Z) as its first arguments, it's testing if MMDXY (i.e. MMD between model and train) is less than MMDXZ (MMd between model and test) # else: # print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclu#de that the test data has a smaller MMD with the true data than the generated data') return pvalue, tstat, sigma def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99): """ Compare two models using relative MMD test """ # make sure they used the same data settings_A = json.load(open('./experiments/settings/' + identifier_A + '.txt', 'r')) settings_B = json.load(open('./experiments/settings/' + identifier_B + '.txt', 'r')) data_path = assert_same_data(settings_A, settings_B) # now load the data data = np.load(data_path + '.data.npy').item()['samples']['vali'] n_samples = data.shape[0] A_samples = model.sample_trained_model(settings_A, epoch_A, n_samples) B_samples = model.sample_trained_model(settings_B, epoch_B, n_samples) # do the comparison # TODO: support multiple signals ## some notes about this test: ## MMD_3_Sample_Test(X, Y, Z) tests the hypothesis that Px is closer to Pz than Py ## that is, test the null hypothesis H0: ## MMD(F, Px, Py) <= MMD(F, Px, Pz) ## versus the alternate hypothesis: ## MMD(F, Px, Py) > MMD(F, Px, Pz) ## at significance level that we select later (just the threshold on the p-value) pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(data[:, :, 0], A_samples[:, :, 0], B_samples[:, :, 0], computeMMDs=True) print(pvalue, tstat, sigma) if pvalue < 0.05: print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A) else: print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A) return pvalue, tstat, sigma, MMDXY, MMDXZ # --- to do with reconstruction --- # def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False): """ Get the reconstruction error of every point in the training set of a given experiment. """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) if settings['data_load_from']: data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item() else: data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item() samples = data_dict['samples'] train = samples['train'] vali = samples['vali'] test = samples['test'] labels = data_dict['labels'] train_labels, test_labels, synth_labels, vali_labels = None, None, None, None try: if rerun: raise FileNotFoundError errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item() train_errors = errors['train'] test_errors = errors['test'] generated_errors = errors['generated'] noisy_errors = errors['noisy'] print('Loaded precomputed errors') except FileNotFoundError: if tstr: synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() generated = synth_data['samples'] synth_labels = synth_data['labels'] train_labels = labels['train'] test_labels = labels['test'] vali_labels = labels['vali'] else: # generate new data n_eval = 500 # generate "easy" samples from the distribution generated = model.sample_trained_model(settings, epoch, n_eval) # generate "hard' random samples, not from train/test distribution # TODO: use original validation examples, add noise etc. ## random_samples = np.random.normal(size=generated.shape) # random_samples -= np.mean(random_samples, axis=0) # random_samples += np.mean(vali, axis=0) # random_samples /= np.std(random_samples, axis=0) # random_samples *= np.std(vali, axis=0) # get all the errors print('Getting reconstruction errors on train set') if train.shape[0] > max_samples: index_subset = np.random.permutation(train.shape[0])[:max_samples] train = train[index_subset] if train_labels is not None: train_labels = train_labels[index_subset] train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels) print('Getting reconstruction errors on test set') if test.shape[0] > max_samples: index_subset = np.random.permutation(test.shape[0])[:max_samples] test = test[index_subset] if test_labels is not None: test_labels = test_labels[index_subset] test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels) D_test, p_test = ks_2samp(train_errors, test_errors) print('KS statistic and p-value for train v. test erors:', D_test, p_test) pdb.set_trace() print('Getting reconstruction errors on generated set') generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels) D_gen, p_gen = ks_2samp(generated_errors, train_errors) print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) # print('Getting reconstruction errors on noisy set') # alpha = 0.5 # noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali) # noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels) noisy_errors = None # save! errors = {'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors} np.save('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors) # do two-sample Kolomogorov-Smirnov test for equality D_test, p_test = ks_2samp(train_errors, test_errors) print('KS statistic and p-value for train v. test erors:', D_test, p_test) D_gen, p_gen = ks_2samp(generated_errors, train_errors) print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) # visualise distribution of errors for train and test plotting.reconstruction_errors(identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors) # visualise the "hardest" and "easiest" samples from train ranking_train = np.argsort(train_errors) easiest_train = ranking_train[:6] hardest_train = ranking_train[-6:] plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2) plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2) # visualise the "hardest" and "easiest" samples from random # ranking_random = np.argsort(noisy_errors) # easiest_random = ranking_random[:6] # hardest_random = ranking_random[-6:] # plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2) # plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2) return True def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g_tolerance=0.025, use_min=True, C_samples=None): """ Get (average over a few runs) of the reconstruction error per sample """ n_samples = samples.shape[0] heuristic_sigma = np.float32(mmd.median_pairwise_distance(samples)) errors = np.zeros(shape=(n_samples, n_rep)) for rep in range(n_rep): Z, rep_errors, sigma = model.invert(identifier, epoch, samples, n_iter=n_iter, heuristic_sigma=heuristic_sigma, g_tolerance=g_tolerance, C_samples=C_samples) errors[:, rep] = rep_errors # return min, or average? if use_min: errors = np.min(errors, axis=1) else: # use mean errors = np.mean(errors, axis=1) return errors # --- visualisation evaluation --- # def view_digit(identifier, epoch, digit, n_samples=6): """ Generate a bunch of MNIST digits from a CGAN, view them """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) if settings['one_hot']: assert settings['max_val'] == 1 assert digit <= settings['cond_dim'] C_samples = np.zeros(shape=(n_samples, settings['cond_dim'])) C_samples[:, digit] = 1 else: assert settings['cond_dim'] == 1 assert digit <= settings['max_val'] C_samples = np.array([digit]*n_samples).reshape(-1, 1) digit_samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples=None, cond_dim=settings['cond_dim'], C_samples=C_samples) digit_samples = digit_samples.reshape(n_samples, -1, 1) # visualise plotting.save_mnist_plot_sample(digit_samples, digit, identifier + '_' + str(epoch) + '_digit_', n_samples) return True def view_interpolation(identifier, epoch, n_steps=6, input_samples=None, e_tolerance=0.01, sigma=3.29286853021): """ If samples: generate interpolation between real points Else: Sample two points in the latent space, view a linear interpolation between them. """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) if input_samples is None: # grab two trainng examples data = np.load('./experiments/data/' + identifier + '.data.npy').item() train = data['samples']['train'] input_samples = np.random.permutation(train)[:2] # Z_sampleA, Z_sampleB = model.sample_Z(2, settings['seq_length'], settings['latent_dim'], # settings['use_time']) if sigma is None: ## gotta get a sigma somehow sigma = mmd.median_pairwise_distance(train) print('Calcualted heuristic sigma from training data:', sigma) Zs, error, _ = model.invert(settings, epoch, input_samples, e_tolerance=e_tolerance) Z_sampleA, Z_sampleB = Zs Z_samples = plotting.interpolate(Z_sampleA, Z_sampleB, n_steps=n_steps) samples = model.sample_trained_model(settings, epoch, Z_samples.shape[0], Z_samples) # get distances from generated samples to target samples d_A, d_B = [], [] for sample in samples: d_A.append(sample_distance(sample, samples[0], sigma)) d_B.append(sample_distance(sample, samples[-1], sigma)) distances = pd.DataFrame({'dA': d_A, 'dB': d_B}) plotting.save_plot_interpolate(input_samples, samples, epoch, settings['identifier'] + '_epoch' + str(epoch), distances=distances, sigma=sigma) return True def view_latent_vary(identifier, epoch, n_steps=6): settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) Z_sample = model.sample_Z(1, settings['seq_length'], settings['latent_dim'], settings['use_time'])[0] samples_dim = [] for dim in range(settings['latent_dim']): Z_samples_dim = plotting.vary_latent_dimension(Z_sample, dim, n_steps) samples_dim.append(model.sample_trained_model(settings, epoch, Z_samples_dim.shape[0], Z_samples_dim)) plotting.save_plot_vary_dimension(samples_dim, epoch, settings['identifier'] + '_varydim', n_dim=settings['latent_dim']) return True def view_reconstruction(identifier, epoch, real_samples, tolerance=1): """ Given a set of real samples, find the "closest" latent space points corresponding to them, generate samples from these, visualise! """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) Zs, error, sigma = model.invert(settings, epoch, real_samples, tolerance=tolerance) plotting.visualise_latent(Zs[0], identifier+'_' + str(epoch) + '_0') plotting.visualise_latent(Zs[1], identifier+'_' + str(epoch) + '_1') model_samples = model.sample_trained_model(settings, epoch, Zs.shape[0], Zs) plotting.save_plot_reconstruct(real_samples, model_samples, settings['identifier']) return True def view_fixed(identifier, epoch, n_samples=6, dim=None): """ What happens when we give the same point at each time step? """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], settings['use_time']) # now, propagate forward the value at time 0 (which time doesn't matter) for i in range(1, settings['seq_length']): if dim is None: Z_samples[:, i, :] = Z_samples[:, 0, :] else: Z_samples[:, i, dim] = Z_samples[:, 0, dim] # now generate samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples) # now visualise plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples) return True def view_params(identifier, epoch): """ Visualise weight matrices in the GAN """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) parameters = model.load_parameters(identifier + '_' + str(epoch)) plotting.plot_parameters(parameters, identifier + '_' + str(epoch)) return True # --- to do with samples --- # def sample_distance(sampleA, sampleB, sigma): """ I know this isn't the best distance measure, alright. """ # RBF! gamma = 1 / (2 * sigma**2) similarity = np.exp(-gamma*(np.linalg.norm(sampleA - sampleB)**2)) distance = 1 - similarity return distance ### --- TSTR ---- ### def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X): """ Train a CNN (code copied/adapted from Cristobal's mnist_keras_trts_0_2) (ONLY MNIST, ONLY 14x14) (ONLY DIGITS UP TO 3) """ print('Training CNN!') input_shape = (14,14,1) batch_size = 128 num_classes = 3 epochs = 1000 m = Sequential() m.add(Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) m.add(Conv2D(32, (3, 3), activation='relu')) m.add(MaxPooling2D(pool_size=(2, 2))) m.add(Dropout(0.25)) m.add(Flatten()) m.add(Dense(128, activation='relu')) m.add(Dropout(0.5)) m.add(Dense(num_classes, activation='softmax')) m.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto') m.fit(np.expand_dims(train_X, axis=-1), train_Y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(np.expand_dims(vali_X, axis=-1), vali_Y), callbacks=[earlyStopping]) test_predictions = m.predict(np.expand_dims(test_X, axis=-1)) return test_predictions def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali=True, CNN=False, reverse=False): """ Either load or generate synthetic training, real test data... Load synthetic training, real test data, do multi-class SVM (basically just this: http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html) If reverse = True: do TRTS """ print('Running TSTR on', identifier, 'at epoch', epoch) if vali: test_set = 'vali' else: test_set = 'test' if generate: data = np.load('./experiments/data/' + identifier + '.data.npy').item() samples = data['samples'] train_X = samples['train'] test_X = samples[test_set] labels = data['labels'] train_Y = labels['train'] test_Y = labels[test_set] # now sample from the model synth_Y = np.tile(train_Y, [duplicate_synth, 1]) synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y) # for use in TRTS synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y) synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y} np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data) else: print('Loading synthetic data from pre-sampled model') exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() test_X, test_Y = exp_data['test_data'], exp_data['test_labels'] train_X, train_Y = exp_data['train_data'], exp_data['train_labels'] synth_X, synth_Y = exp_data['synth_data'], exp_data['synth_labels'] if reverse: which_setting = 'trts' print('Swapping synthetic test set in for real, to do TRTS!') test_X = synth_testX else: print('Doing normal TSTR') which_setting = 'tstr' # make classifier if not CNN: model_choice = 'RF' # if multivariate, reshape if len(test_X.shape) == 3: test_X = test_X.reshape(test_X.shape[0], -1) if len(train_X.shape) == 3: train_X = train_X.reshape(train_X.shape[0], -1) if len(synth_X.shape) == 3: synth_X = synth_X.reshape(synth_X.shape[0], -1) # if one hot, fix if len(synth_Y.shape) > 1 and not synth_Y.shape[1] == 1: synth_Y = np.argmax(synth_Y, axis=1) train_Y = np.argmax(train_Y, axis=1) test_Y = np.argmax(test_Y, axis=1) # random forest #synth_classifier = SVC(gamma=0.001) #real_classifier = SVC(gamma=0.001) synth_classifier = RandomForestClassifier(n_estimators=500) real_classifier = RandomForestClassifier(n_estimators=500) # fit real_classifier.fit(train_X, train_Y) synth_classifier.fit(synth_X, synth_Y) # test on real synth_predY = synth_classifier.predict(test_X) real_predY = real_classifier.predict(test_X) else: model_choice = 'CNN' synth_predY = train_CNN(synth_X, synth_Y, samples['vali'], labels['vali'], test_X) clear_session() real_predY = train_CNN(train_X, train_Y, samples['vali'], labels['vali'], test_X) clear_session() # CNN setting is all 'one-hot' test_Y = np.argmax(test_Y, axis=1) synth_predY = np.argmax(synth_predY, axis=1) real_predY = np.argmax(real_predY, axis=1) # report on results synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y, synth_predY, average='weighted') synth_accuracy = accuracy_score(test_Y, synth_predY) synth_auprc = 'NaN' synth_auroc = 'NaN' synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc] real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y, real_predY, average='weighted') real_accuracy = accuracy_score(test_Y, real_predY) real_auprc = 'NaN' real_auroc = 'NaN' real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc] all_scores = synth_scores + real_scores if vali: report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a') report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() else: report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a') report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() # visualise results try: plotting.view_mnist_eval(identifier + '_' + str(epoch), train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY) except ValueError: print('PLOTTING ERROR') pdb.set_trace() print(classification_report(test_Y, synth_predY)) print(classification_report(test_Y, real_predY)) return synth_f1, real_f1 def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False): """ """ if vali: test_set = 'vali' else: test_set = 'test' data = np.load('./experiments/data/' + identifier + '.data.npy').item() samples = data['samples'] train_X = samples['train'] test_X = samples[test_set] labels = data['labels'] train_Y = labels['train'] test_Y = labels[test_set] if generate: # now sample from the model synth_Y = np.tile(train_Y, [duplicate_synth, 1]) synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y) # for use in TRTS synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y) synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y} np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data) else: print('Loading pre-generated data') print('WARNING: not implemented for TRTS') # get "train" data exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() synth_X = exp_data['samples'] synth_Y = exp_data['labels'] n_synth = synth_X.shape[0] synth_X = synth_X.reshape(n_synth, -1) # pdb.set_trace() # # ALERT ALERT MODIFYING # synth_X = 2*(synth_X > 0) - 1 orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item() if reverse: which_setting = 'trts' # visualise distribution of errors for train and test print('Swapping synthetic test set in for real, to do TRTS!') test_X = synth_testX else: print('Doing normal TSTR') which_setting = 'tstr' # # get test data # test_X = data['test_X'] # test_Y = data['test_Y'] if not CNN: model_choice = 'RF' # if multivariate, reshape if len(test_X.shape) == 3: test_X = test_X.reshape(test_X.shape[0], -1) if len(train_X.shape) == 3: train_X = train_X.reshape(train_X.shape[0], -1) if len(synth_X.shape) == 3: synth_X = synth_X.reshape(synth_X.shape[0], -1) else: raise ValueError(CNN) model_choice = 'CNN' # we will select the best validation set epoch based on F1 score, take average across all the tasks score_list = [] for label in range(synth_Y.shape[1]): task = orig_data['Y_columns'][label] if vali: if not task in ['low_sao2', 'high_heartrate', 'low_respiration']: print('Skipping task', task, 'because validation evaluation.') continue print('Evaluating on task:', task) #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)') #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label]) #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label]) synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label]) synth_predY = synth_classifier.predict(test_X) synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1] real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label]) real_predY = real_classifier.predict(test_X) real_predY_prob = real_classifier.predict_proba(test_X)[:, 1] #print('(predicted', np.mean(predict), 'positive labels)') synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted') synth_accuracy = accuracy_score(test_Y[:, label], synth_predY) synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob) synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob) synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc] real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted') real_accuracy = accuracy_score(test_Y[:, label], real_predY) real_auprc = average_precision_score(test_Y[:, label], real_predY_prob) real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob) real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc] all_scores = synth_scores + real_scores if vali: report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a') report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() else: report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a') report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() print(classification_report(test_Y[:, label], synth_predY)) print(classification_report(test_Y[:, label], real_predY)) if task in ['low_sao2', 'high_heartrate', 'low_respiration']: score_list.append(synth_auprc + synth_auroc) if do_OR: raise NotImplementedError # do the OR task extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4] extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5] extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6] Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T Y_OR_test = (Y_OR_test > 0)*1 extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4] extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5] extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6] Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T Y_OR_synth = (Y_OR_synth > 0)*1 OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP'] OR_results = [] for label in range(Y_OR_synth.shape[1]): print('task:', OR_names[label]) print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)') m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label]) predict = m.predict(X_test) print('(predicted', np.mean(predict), 'positive labels)') accuracy = accuracy_score(Y_OR_test[:, label], predict) precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict) recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict) print(accuracy, precision, recall) OR_results.append([accuracy, precision, recall]) else: OR_results = [] score_across_tasks = np.mean(np.array(score_list)) return score_across_tasks def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine, identifier_mnist, epoch_mnist): """ for each experiment: - plot a bunch of train examples - sample a bunch of generated examples - plot all in separate PDFs so i can merge in illustrator for sine and rbf, grey background MNIST is just MNIST (square though) """ n_samples = 15 # settings settings_rbf = json.load(open('./experiments/settings/' + identifier_rbf + '.txt', 'r')) settings_sine = json.load(open('./experiments/settings/' + identifier_sine + '.txt', 'r')) settings_mnist = json.load(open('./experiments/settings/' + identifier_mnist + '.txt', 'r')) # data data_rbf = np.load('./experiments/data/' + identifier_rbf + '.data.npy').item() data_sine = np.load('./experiments/data/' + identifier_sine + '.data.npy').item() data_mnist = np.load('./experiments/data/' + identifier_mnist + '.data.npy').item() train_rbf = data_rbf['samples']['train'] train_sine = data_sine['samples']['train'] train_mnist = data_mnist['samples']['train'] # sample samples_rbf = model.sample_trained_model(settings_rbf, epoch_rbf, n_samples) samples_sine = model.sample_trained_model(settings_sine, epoch_sine, n_samples) samples_mnist = model.sample_trained_model(settings_mnist, epoch_mnist, n_samples) # plot them all index = 0 #for sample in np.random.permutation(train_rbf)[:n_samples]: # plotting.nips_plot_rbf(sample, index, 'train') # index += 1 #for sample in samples_rbf: # plotting.nips_plot_rbf(sample, index, 'GAN') # index += 1 #for sample in np.random.permutation(train_sine)[:n_samples]: # plotting.nips_plot_sine(sample, index, 'train') # index += 1 #for sample in samples_sine: # plotting.nips_plot_sine(sample, index, 'GAN') # index += 1 for sample in np.random.permutation(train_mnist)[:n_samples]: plotting.nips_plot_mnist(sample, index, 'train') index += 1 for sample in samples_mnist: plotting.nips_plot_mnist(sample, index, 'GAN') index += 1 return True ================================================ FILE: experiments/settings/kdd99.txt ================================================ { "settings_file": "", "data": "kdd99", "seq_length": 30, "num_signals": 6, "normalise": false, "scale": 0.1, "freq_low": 1.0, "freq_high": 5.0, "amplitude_low": 0.1, "amplitude_high": 0.9, "multivariate_mnist": false, "full_mnist": false, "data_load_from": "", "resample_rate_in_min": 15, "hidden_units_g": 100, "hidden_units_d": 100, "hidden_units_e": 100, "kappa": 1, "latent_dim": 15, "weight": 0.5, "degree": 1, "batch_mean": false, "learn_scale": false, "learning_rate": 0.1, "batch_size": 500, "num_epochs": 100, "D_rounds": 1, "G_rounds": 3, "E_rounds": 1, "shuffle": true, "eval_mul": false, "eval_an": false, "eval_single": false, "wrong_labels": false, "identifier": "kdd99", "sub_id": "kdd99", "dp": false, "l2norm_bound": 1e-05, "batches_per_lot": 1, "dp_sigma": 1e-05, "use_time": false, "seq_step": 10, "num_generated_features": 6 } ================================================ FILE: experiments/settings/kdd99_test.txt ================================================ { "settings_file": "", "data": "kdd99_test", "seq_length": 30, "num_signals": 6, "normalise": false, "scale": 0.1, "freq_low": 1.0, "freq_high": 5.0, "amplitude_low": 0.1, "amplitude_high": 0.9, "multivariate_mnist": false, "full_mnist": false, "data_load_from": "", "resample_rate_in_min": 15, "hidden_units_g": 100, "hidden_units_d": 100, "hidden_units_e": 100, "kappa": 1, "latent_dim": 15, "weight": 0.5, "degree": 1, "batch_mean": false, "learn_scale": false, "learning_rate": 0.1, "batch_size": 500, "num_epochs": 100, "D_rounds": 1, "G_rounds": 3, "E_rounds": 1, "shuffle": true, "eval_mul": false, "eval_an": false, "eval_single": false, "wrong_labels": false, "identifier": "kdd99_test", "sub_id": "kdd99", "dp": false, "l2norm_bound": 1e-05, "batches_per_lot": 1, "dp_sigma": 1e-05, "use_time": false, "seq_step": 10, "num_generated_features": 6 } ================================================ FILE: mmd.py ================================================ ''' MMD functions implemented in tensorflow. (from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py) ''' from __future__ import division import tensorflow as tf from tf_ops import dot, sq_sum from scipy.spatial.distance import pdist from numpy import median, vstack, einsum import pdb import numpy as np _eps=1e-8 ################################################################################ ### Quadratic-time MMD with Gaussian RBF kernel def _mix_rbf_kernel(X, Y, sigmas, wts=None): """ """ if wts is None: wts = [1.0] * sigmas.get_shape()[0] # debug! if len(X.shape) == 2: # matrix XX = tf.matmul(X, X, transpose_b=True) XY = tf.matmul(X, Y, transpose_b=True) YY = tf.matmul(Y, Y, transpose_b=True) elif len(X.shape) == 3: # tensor -- this is computing the Frobenius norm XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]]) XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]]) YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]]) else: raise ValueError(X) X_sqnorms = tf.diag_part(XX) Y_sqnorms = tf.diag_part(YY) r = lambda x: tf.expand_dims(x, 0) c = lambda x: tf.expand_dims(x, 1) K_XX, K_XY, K_YY = 0, 0, 0 for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts): gamma = 1 / (2 * sigma**2) K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms))) K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms))) K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms))) return K_XX, K_XY, K_YY, tf.reduce_sum(wts) def rbf_mmd2(X, Y, sigma=1, biased=True): return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased) def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True): K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts) return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased) def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True): return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased) def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True): K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts) return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased) ################################################################################ ### Helper functions to compute variances based on kernel matrices def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = tf.cast(K_XX.get_shape()[0], tf.float32) n = tf.cast(K_YY.get_shape()[0], tf.float32) if biased: mmd2 = (tf.reduce_sum(K_XX) / (m * m) + tf.reduce_sum(K_YY) / (n * n) - 2 * tf.reduce_sum(K_XY) / (m * n)) else: if const_diagonal is not False: trace_X = m * const_diagonal trace_Y = n * const_diagonal else: trace_X = tf.trace(K_XX) trace_Y = tf.trace(K_YY) mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1)) + (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1)) - 2 * tf.reduce_sum(K_XY) / (m * n)) return mmd2 def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False, min_var_est=_eps): mmd2, var_est = _mmd2_and_variance( K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased) ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est)) return mmd2, ratio def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape ### Get the various sums of kernels that we'll use # Kts drop the diagonal, but we don't need to compute them explicitly if const_diagonal is not False: const_diagonal = tf.cast(const_diagonal, tf.float32) diag_X = diag_Y = const_diagonal sum_diag_X = sum_diag_Y = m * const_diagonal sum_diag2_X = sum_diag2_Y = m * const_diagonal**2 else: diag_X = tf.diag_part(K_XX) diag_Y = tf.diag_part(K_YY) sum_diag_X = tf.reduce_sum(diag_X) sum_diag_Y = tf.reduce_sum(diag_Y) sum_diag2_X = sq_sum(diag_X) sum_diag2_Y = sq_sum(diag_Y) Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y K_XY_sums_0 = tf.reduce_sum(K_XY, 0) K_XY_sums_1 = tf.reduce_sum(K_XY, 1) Kt_XX_sum = tf.reduce_sum(Kt_XX_sums) Kt_YY_sum = tf.reduce_sum(Kt_YY_sums) K_XY_sum = tf.reduce_sum(K_XY_sums_0) Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y K_XY_2_sum = sq_sum(K_XY) if biased: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) / (m * m) - 2 * K_XY_sum / (m * m)) else: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1)) + (Kt_YY_sum + sum_diag_Y) / (m * (m-1)) - 2 * K_XY_sum / (m * m)) var_est = ( 2 / (m**2 * (m-1)**2) * ( 2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum) - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + 4*(m-2) / (m**3 * (m-1)**2) * ( sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0)) - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2 + 8 / (m**3 * (m-1)) * ( 1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum - dot(Kt_XX_sums, K_XY_sums_1) - dot(Kt_YY_sums, K_XY_sums_0)) ) return mmd2, var_est ### additions from stephanie, for convenience def median_pairwise_distance(X, Y=None): """ Heuristic for bandwidth of the RBF. Median pairwise distance of joint data. If Y is missing, just calculate it from X: this is so that, during training, as Y changes, we can use a fixed bandwidth (and save recalculating this each time we evaluated the mmd) At the end of training, we do the heuristic "correctly" by including both X and Y. Note: most of this code is assuming tensorflow, but X and Y are just ndarrays """ if Y is None: Y = X # this is horrendously inefficient, sorry if len(X.shape) == 2: # matrix X_sqnorms = einsum('...i,...i', X, X) Y_sqnorms = einsum('...i,...i', Y, Y) XY = einsum('ia,ja', X, Y) elif len(X.shape) == 3: # tensor -- this is computing the Frobenius norm X_sqnorms = einsum('...ij,...ij', X, X) Y_sqnorms = einsum('...ij,...ij', Y, Y) XY = einsum('iab,jab', X, Y) else: raise ValueError(X) distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1)) return median(distances) def median_pairwise_distance_o(X, Y=None): """ Heuristic for bandwidth of the RBF. Median pairwise distance of joint data. If Y is missing, just calculate it from X: this is so that, during training, as Y changes, we can use a fixed bandwidth (and save recalculating this each time we evaluated the mmd) At the end of training, we do the heuristic "correctly" by including both X and Y. Note: most of this code is assuming tensorflow, but X and Y are just ndarrays """ if Y is None: Y = X # this is horrendously inefficient, sorry if len(X.shape) == 2: # matrix X_sqnorms = np.einsum('...i,...i', X, X) Y_sqnorms = np.einsum('...i,...i', Y, Y) XY = np.einsum('ia,ja', X, Y) elif len(X.shape) == 3: # tensor -- this is computing the Frobenius norm X_sqnorms = np.einsum('...ij,...ij', X, X) # reduce the tensor shape Y_sqnorms = np.einsum('...ij,...ij', Y, Y) XY = np.einsum('iab,jab', X, Y) # X*Y^T?? else: raise ValueError(X) distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2 * XY + Y_sqnorms.reshape(1, -1)) distances = distances.reshape(-1, 1) distances = distances[~np.isnan(distances)] return np.median(distances) ================================================ FILE: mod_core_rnn_cell_impl.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #modified by Stephanie (@corcra) to enable initializing the bias term in lstm """ # ============================================================================== """Module implementing RNN Cells.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import contextlib import hashlib import math import numbers from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops.math_ops import sigmoid from tensorflow.python.ops.math_ops import tanh #from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell from tensorflow.python.ops.rnn_cell_impl import RNNCell from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest _BIAS_VARIABLE_NAME = "biases" _WEIGHTS_VARIABLE_NAME = "weights" @contextlib.contextmanager def _checked_scope(cell, scope, reuse=None, **kwargs): if reuse is not None: kwargs["reuse"] = reuse with vs.variable_scope(scope, **kwargs) as checking_scope: scope_name = checking_scope.name if hasattr(cell, "_scope"): cell_scope = cell._scope # pylint: disable=protected-access if cell_scope.name != checking_scope.name: raise ValueError( "Attempt to reuse RNNCell %s with a different variable scope than " "its first use. First use of cell was with scope '%s', this " "attempt is with scope '%s'. Please create a new instance of the " "cell if you would like it to use a different set of weights. " "If before you were using: MultiRNNCell([%s(...)] * num_layers), " "change to: MultiRNNCell([%s(...) for _ in range(num_layers)]). " "If before you were using the same cell instance as both the " "forward and reverse cell of a bidirectional RNN, simply create " "two instances (one for forward, one for reverse). " "In May 2017, we will start transitioning this cell's behavior " "to use existing stored weights, if any, when it is called " "with scope=None (which can lead to silent model degradation, so " "this error will remain until then.)" % (cell, cell_scope.name, scope_name, type(cell).__name__, type(cell).__name__)) else: weights_found = False try: with vs.variable_scope(checking_scope, reuse=True): vs.get_variable(_WEIGHTS_VARIABLE_NAME) weights_found = True except ValueError: pass if weights_found and reuse is None: raise ValueError( "Attempt to have a second RNNCell use the weights of a variable " "scope that already has weights: '%s'; and the cell was not " "constructed as %s(..., reuse=True). " "To share the weights of an RNNCell, simply " "reuse it in your second calculation, or create a new one with " "the argument reuse=True." % (scope_name, type(cell).__name__)) # Everything is OK. Update the cell's scope and yield it. cell._scope = checking_scope # pylint: disable=protected-access yield checking_scope class BasicRNNCell(RNNCell): """The most basic RNN cell.""" def __init__(self, num_units, input_size=None, activation=tanh, reuse=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._reuse = reuse @property def state_size(self): return self._num_units @property def output_size(self): return self._num_units def __call__(self, inputs, state, scope=None): """Most basic RNN: output = new_state = act(W * input + U * state + B).""" with _checked_scope(self, scope or "basic_rnn_cell", reuse=self._reuse): output = self._activation( _linear([inputs, state], self._num_units, True)) return output, output class GRUCell(RNNCell): """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).""" def __init__(self, num_units, input_size=None, activation=tanh, reuse=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._reuse = reuse @property def state_size(self): return self._num_units @property def output_size(self): return self._num_units def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with _checked_scope(self, scope or "gru_cell", reuse=self._reuse): with vs.variable_scope("gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. value = sigmoid(_linear( [inputs, state], 2 * self._num_units, True, 1.0)) r, u = array_ops.split( value=value, num_or_size_splits=2, axis=1) with vs.variable_scope("candidate"): c = self._activation(_linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c return new_h, new_h _LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h")) class LSTMStateTuple(_LSTMStateTuple): """Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state. Stores two elements: `(c, h)`, in that order. Only used when `state_is_tuple=True`. """ __slots__ = () @property def dtype(self): (c, h) = self if not c.dtype == h.dtype: raise TypeError("Inconsistent internal state: %s vs %s" % (str(c.dtype), str(h.dtype))) return c.dtype class BasicLSTMCell(RNNCell): """Basic LSTM recurrent network cell. The implementation is based on: http://arxiv.org/abs/1409.2329. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. For advanced models, please use the full LSTMCell that follows. """ def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=tanh, reuse=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse @property def state_size(self): return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units) @property def output_size(self): return self._num_units def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) concat = _linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state class LSTMCell(RNNCell): """Long short-term memory unit (LSTM) recurrent network cell. The default non-peephole implementation is based on: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. The peephole implementation is based on: https://research.google.com/pubs/archive/43905.pdf Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory recurrent neural network architectures for large scale acoustic modeling." INTERSPEECH, 2014. The class uses optional peep-hole connections, optional cell clipping, and an optional projection layer. """ def __init__(self, num_units, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, bias_start=0.0, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=tanh, reuse=None): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell input_size: Deprecated and unused. use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. bias_start: (optional) The VALUE to initialize the bias to, in the linear call num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is provided, then the projected values are clipped elementwise to within `[-proj_clip, proj_clip]`. num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. This latter behavior will soon be deprecated. activation: Activation function of the inner states. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) if num_unit_shards is not None or num_proj_shards is not None: logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._bias_start = bias_start self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse if num_proj: self._state_size = ( LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = ( LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units @property def state_size(self): return self._state_size @property def output_size(self): return self._output_size def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. scope: VariableScope for the created subgraph; defaults to "lstm_cell". Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") with _checked_scope(self, scope or "lstm_cell", initializer=self._initializer, reuse=self._reuse) as unit_scope: if self._num_unit_shards is not None: unit_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_unit_shards)) # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, bias_start=self._bias_start) i, j, f, o = array_ops.split( value=lstm_matrix, num_or_size_splits=4, axis=1) # Diagonal connections if self._use_peepholes: with vs.variable_scope(unit_scope) as projection_scope: if self._num_unit_shards is not None: projection_scope.set_partitioner(None) w_f_diag = vs.get_variable( "w_f_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable( "w_i_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable( "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: with vs.variable_scope("projection") as proj_scope: if self._num_proj_shards is not None: proj_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_proj_shards)) m = _linear(m, self._num_proj, bias=False) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)) return m, new_state class OutputProjectionWrapper(RNNCell): """Operator adding an output projection to the given cell. Note: in many cases it may be more efficient to not use this wrapper, but instead concatenate the whole sequence of your outputs in time, do the projection on this batch-concatenated sequence, then split it if needed or directly feed into a softmax. """ def __init__(self, cell, output_size, reuse=None): """Create a cell with output projection. Args: cell: an RNNCell, a projection to output_size is added to it. output_size: integer, the size of the output after projection. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. Raises: TypeError: if cell is not an RNNCell. ValueError: if output_size is not positive. """ if not isinstance(cell, RNNCell): raise TypeError("The parameter cell is not RNNCell.") if output_size < 1: raise ValueError("Parameter output_size must be > 0: %d." % output_size) self._cell = cell self._output_size = output_size self._reuse = reuse @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run the cell and output projection on inputs, starting from state.""" output, res_state = self._cell(inputs, state) # Default scope: "OutputProjectionWrapper" with _checked_scope(self, scope or "output_projection_wrapper", reuse=self._reuse): projected = _linear(output, self._output_size, True) return projected, res_state class InputProjectionWrapper(RNNCell): """Operator adding an input projection to the given cell. Note: in many cases it may be more efficient to not use this wrapper, but instead concatenate the whole sequence of your inputs in time, do the projection on this batch-concatenated sequence, then split it. """ def __init__(self, cell, num_proj, input_size=None): """Create a cell with input projection. Args: cell: an RNNCell, a projection of inputs is added before it. num_proj: Python integer. The dimension to project to. input_size: Deprecated and unused. Raises: TypeError: if cell is not an RNNCell. """ if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) if not isinstance(cell, RNNCell): raise TypeError("The parameter cell is not RNNCell.") self._cell = cell self._num_proj = num_proj @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._cell.output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run the input projection and then the cell.""" # Default scope: "InputProjectionWrapper" with vs.variable_scope(scope or "input_projection_wrapper"): projected = _linear(inputs, self._num_proj, True) return self._cell(projected, state) def _enumerated_map_structure(map_fn, *args, **kwargs): ix = [0] def enumerated_fn(*inner_args, **inner_kwargs): r = map_fn(ix[0], *inner_args, **inner_kwargs) ix[0] += 1 return r return nest.map_structure(enumerated_fn, *args, **kwargs) class DropoutWrapper(RNNCell): """Operator adding dropout to inputs and outputs of the given cell.""" def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0, state_keep_prob=1.0, variational_recurrent=False, input_size=None, dtype=None, seed=None): """Create a cell with added input, state, and/or output dropout. If `variational_recurrent` is set to `True` (**NOT** the default behavior), then the the same dropout mask is applied at every step, as described in: Y. Gal, Z Ghahramani. "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks". https://arxiv.org/abs/1512.05287 Otherwise a different dropout mask is applied at every time step. Args: cell: an RNNCell, a projection to output_size is added to it. input_keep_prob: unit Tensor or float between 0 and 1, input keep probability; if it is constant and 1, no input dropout will be added. output_keep_prob: unit Tensor or float between 0 and 1, output keep probability; if it is constant and 1, no output dropout will be added. state_keep_prob: unit Tensor or float between 0 and 1, output keep probability; if it is constant and 1, no output dropout will be added. State dropout is performed on the *output* states of the cell. variational_recurrent: Python bool. If `True`, then the same dropout pattern is applied across all time steps per run call. If this parameter is set, `input_size` **must** be provided. input_size: (optional) (possibly nested tuple of) `TensorShape` objects containing the depth(s) of the input tensors expected to be passed in to the `DropoutWrapper`. Required and used **iff** `variational_recurrent = True` and `input_keep_prob < 1`. dtype: (optional) The `dtype` of the input, state, and output tensors. Required and used **iff** `variational_recurrent = True`. seed: (optional) integer, the randomness seed. Raises: TypeError: if cell is not an RNNCell. ValueError: if any of the keep_probs are not between 0 and 1. """ if not isinstance(cell, RNNCell): raise TypeError("The parameter cell is not a RNNCell.") with ops.name_scope("DropoutWrapperInit"): def tensor_and_const_value(v): tensor_value = ops.convert_to_tensor(v) const_value = tensor_util.constant_value(tensor_value) return (tensor_value, const_value) for prob, attr in [(input_keep_prob, "input_keep_prob"), (state_keep_prob, "state_keep_prob"), (output_keep_prob, "output_keep_prob")]: tensor_prob, const_prob = tensor_and_const_value(prob) if const_prob is not None: if const_prob < 0 or const_prob > 1: raise ValueError("Parameter %s must be between 0 and 1: %d" % (attr, const_prob)) setattr(self, "_%s" % attr, float(const_prob)) else: setattr(self, "_%s" % attr, tensor_prob) # Set cell, variational_recurrent, seed before running the code below self._cell = cell self._variational_recurrent = variational_recurrent self._seed = seed self._recurrent_input_noise = None self._recurrent_state_noise = None self._recurrent_output_noise = None if variational_recurrent: if dtype is None: raise ValueError( "When variational_recurrent=True, dtype must be provided") def convert_to_batch_shape(s): # Prepend a 1 for the batch dimension; for recurrent # variational dropout we use the same dropout mask for all # batch elements. return array_ops.concat( ([1], tensor_shape.TensorShape(s).as_list()), 0) def batch_noise(s, inner_seed): shape = convert_to_batch_shape(s) return random_ops.random_uniform(shape, seed=inner_seed, dtype=dtype) if (not isinstance(self._input_keep_prob, numbers.Real) or self._input_keep_prob < 1.0): if input_size is None: raise ValueError( "When variational_recurrent=True and input_keep_prob < 1.0 or " "is unknown, input_size must be provided") self._recurrent_input_noise = _enumerated_map_structure( lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)), input_size) self._recurrent_state_noise = _enumerated_map_structure( lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)), cell.state_size) self._recurrent_output_noise = _enumerated_map_structure( lambda i, s: batch_noise(s, inner_seed=self._gen_seed("output", i)), cell.output_size) def _gen_seed(self, salt_prefix, index): if self._seed is None: return None salt = "%s_%d" % (salt_prefix, index) string = (str(self._seed) + salt).encode("utf-8") return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._cell.output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def _variational_recurrent_dropout_value( self, index, value, noise, keep_prob): """Performs dropout given the pre-calculated noise tensor.""" # uniform [keep_prob, 1.0 + keep_prob) random_tensor = keep_prob + noise # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) binary_tensor = math_ops.floor(random_tensor) ret = math_ops.div(value, keep_prob) * binary_tensor ret.set_shape(value.get_shape()) return ret def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob): """Decides whether to perform standard dropout or recurrent dropout.""" if not self._variational_recurrent: def dropout(i, v): return nn_ops.dropout( v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i)) return _enumerated_map_structure(dropout, values) else: def dropout(i, v, n): return self._variational_recurrent_dropout_value(i, v, n, keep_prob) return _enumerated_map_structure(dropout, values, recurrent_noise) def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" def _should_dropout(p): return (not isinstance(p, float)) or p < 1 if _should_dropout(self._input_keep_prob): inputs = self._dropout(inputs, "input", self._recurrent_input_noise, self._input_keep_prob) output, new_state = self._cell(inputs, state, scope) if _should_dropout(self._state_keep_prob): new_state = self._dropout(new_state, "state", self._recurrent_state_noise, self._state_keep_prob) if _should_dropout(self._output_keep_prob): output = self._dropout(output, "output", self._recurrent_output_noise, self._output_keep_prob) return output, new_state class ResidualWrapper(RNNCell): """RNNCell wrapper that ensures cell inputs are added to the outputs.""" def __init__(self, cell): """Constructs a `ResidualWrapper` for `cell`. Args: cell: An instance of `RNNCell`. """ self._cell = cell @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._cell.output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run the cell and add its inputs to its outputs. Args: inputs: cell inputs. state: cell state. scope: optional cell scope. Returns: Tuple of cell outputs and new state. Raises: TypeError: If cell inputs and outputs have different structure (type). ValueError: If cell inputs and outputs have different structure (value). """ outputs, new_state = self._cell(inputs, state, scope=scope) nest.assert_same_structure(inputs, outputs) # Ensure shapes match def assert_shape_match(inp, out): inp.get_shape().assert_is_compatible_with(out.get_shape()) nest.map_structure(assert_shape_match, inputs, outputs) res_outputs = nest.map_structure( lambda inp, out: inp + out, inputs, outputs) return (res_outputs, new_state) class DeviceWrapper(RNNCell): """Operator that ensures an RNNCell runs on a particular device.""" def __init__(self, cell, device): """Construct a `DeviceWrapper` for `cell` with device `device`. Ensures the wrapped `cell` is called with `tf.device(device)`. Args: cell: An instance of `RNNCell`. device: A device string or function, for passing to `tf.device`. """ self._cell = cell self._device = device @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._cell.output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run the cell on specified device.""" with ops.device(self._device): return self._cell(inputs, state, scope=scope) class EmbeddingWrapper(RNNCell): """Operator adding input embedding to the given cell. Note: in many cases it may be more efficient to not use this wrapper, but instead concatenate the whole sequence of your inputs in time, do the embedding on this batch-concatenated sequence, then split it and feed into your RNN. """ def __init__(self, cell, embedding_classes, embedding_size, initializer=None, reuse=None): """Create a cell with an added input embedding. Args: cell: an RNNCell, an embedding will be put before its inputs. embedding_classes: integer, how many symbols will be embedded. embedding_size: integer, the size of the vectors we embed into. initializer: an initializer to use when creating the embedding; if None, the initializer from variable scope or a default one is used. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. Raises: TypeError: if cell is not an RNNCell. ValueError: if embedding_classes is not positive. """ if not isinstance(cell, RNNCell): raise TypeError("The parameter cell is not RNNCell.") if embedding_classes <= 0 or embedding_size <= 0: raise ValueError("Both embedding_classes and embedding_size must be > 0: " "%d, %d." % (embedding_classes, embedding_size)) self._cell = cell self._embedding_classes = embedding_classes self._embedding_size = embedding_size self._initializer = initializer self._reuse = reuse @property def state_size(self): return self._cell.state_size @property def output_size(self): return self._cell.output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return self._cell.zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run the cell on embedded inputs.""" with _checked_scope(self, scope or "embedding_wrapper", reuse=self._reuse): with ops.device("/cpu:0"): if self._initializer: initializer = self._initializer elif vs.get_variable_scope().initializer: initializer = vs.get_variable_scope().initializer else: # Default initializer for embeddings should have variance=1. sqrt3 = math.sqrt(3) # Uniform(-sqrt(3), sqrt(3)) has variance=1. initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3) if type(state) is tuple: data_type = state[0].dtype else: data_type = state.dtype embedding = vs.get_variable( "embedding", [self._embedding_classes, self._embedding_size], initializer=initializer, dtype=data_type) embedded = embedding_ops.embedding_lookup( embedding, array_ops.reshape(inputs, [-1])) return self._cell(embedded, state) class MultiRNNCell(RNNCell): """RNN cell composed sequentially of multiple simple cells.""" def __init__(self, cells, state_is_tuple=True): """Create a RNN cell composed sequentially of a number of RNNCells. Args: cells: list of RNNCells that will be composed in this order. state_is_tuple: If True, accepted and returned states are n-tuples, where `n = len(cells)`. If False, the states are all concatenated along the column axis. This latter behavior will soon be deprecated. Raises: ValueError: if cells is empty (not allowed), or at least one of the cells returns a state tuple but the flag `state_is_tuple` is `False`. """ if not cells: raise ValueError("Must specify at least one cell for MultiRNNCell.") if not nest.is_sequence(cells): raise TypeError( "cells must be a list or tuple, but saw: %s." % cells) self._cells = cells self._state_is_tuple = state_is_tuple if not state_is_tuple: if any(nest.is_sequence(c.state_size) for c in self._cells): raise ValueError("Some cells return tuples of states, but the flag " "state_is_tuple is not set. State sizes are: %s" % str([c.state_size for c in self._cells])) @property def state_size(self): if self._state_is_tuple: return tuple(cell.state_size for cell in self._cells) else: return sum([cell.state_size for cell in self._cells]) @property def output_size(self): return self._cells[-1].output_size def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._state_is_tuple: return tuple(cell.zero_state(batch_size, dtype) for cell in self._cells) else: # We know here that state_size of each cell is not a tuple and # presumably does not contain TensorArrays or anything else fancy return super(MultiRNNCell, self).zero_state(batch_size, dtype) def __call__(self, inputs, state, scope=None): """Run this multi-layer cell on inputs, starting from state.""" with vs.variable_scope(scope or "multi_rnn_cell"): cur_state_pos = 0 cur_inp = inputs new_states = [] for i, cell in enumerate(self._cells): with vs.variable_scope("cell_%d" % i): if self._state_is_tuple: if not nest.is_sequence(state): raise ValueError( "Expected state to be a tuple of length %d, but received: %s" % (len(self.state_size), state)) cur_state = state[i] else: cur_state = array_ops.slice( state, [0, cur_state_pos], [-1, cell.state_size]) cur_state_pos += cell.state_size cur_inp, new_state = cell(cur_inp, cur_state) new_states.append(new_state) new_states = (tuple(new_states) if self._state_is_tuple else array_ops.concat(new_states, 1)) return cur_inp, new_states class _SlimRNNCell(RNNCell): """A simple wrapper for slim.rnn_cells.""" def __init__(self, cell_fn): """Create a SlimRNNCell from a cell_fn. Args: cell_fn: a function which takes (inputs, state, scope) and produces the outputs and the new_state. Additionally when called with inputs=None and state=None it should return (initial_outputs, initial_state). Raises: TypeError: if cell_fn is not callable ValueError: if cell_fn cannot produce a valid initial state. """ if not callable(cell_fn): raise TypeError("cell_fn %s needs to be callable", cell_fn) self._cell_fn = cell_fn self._cell_name = cell_fn.func.__name__ init_output, init_state = self._cell_fn(None, None) output_shape = init_output.get_shape() state_shape = init_state.get_shape() self._output_size = output_shape.with_rank(2)[1].value self._state_size = state_shape.with_rank(2)[1].value if self._output_size is None: raise ValueError("Initial output created by %s has invalid shape %s" % (self._cell_name, output_shape)) if self._state_size is None: raise ValueError("Initial state created by %s has invalid shape %s" % (self._cell_name, state_shape)) @property def state_size(self): return self._state_size @property def output_size(self): return self._output_size def __call__(self, inputs, state, scope=None): scope = scope or self._cell_name output, state = self._cell_fn(inputs, state, scope=scope) return output, state def _linear(args, output_size, bias, bias_start=0.0, scope=None): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape() for a in args] for shape in shapes: if shape.ndims != 2: raise ValueError("linear is expecting 2D arguments: %s" % shapes) if shape[1].value is None: raise ValueError("linear expects shape[1] to be provided for shape %s, " "but saw %s" % (shape, shape[1])) else: total_arg_size += shape[1].value dtype = [a.dtype for a in args][0] # Now the computation. scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: weights = vs.get_variable( _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype) if len(args) == 1: res = math_ops.matmul(args[0], weights) else: res = math_ops.matmul(array_ops.concat(args, 1), weights) if not bias: return res with vs.variable_scope(outer_scope) as inner_scope: inner_scope.set_partitioner(None) biases = vs.get_variable( _BIAS_VARIABLE_NAME, [output_size], dtype=dtype, initializer=init_ops.constant_initializer(bias_start, dtype=dtype)) return nn_ops.bias_add(res, biases) ================================================ FILE: model.py ================================================ import tensorflow as tf import numpy as np # from data_utils import get_batch import data_utils import pdb import json import sys from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm # from tensorflow.contrib.rnn import LSTMCell tf.logging.set_verbosity(tf.logging.ERROR) import mmd from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer from differential_privacy.dp_sgd.dp_optimizer import sanitizer from differential_privacy.privacy_accountant.tf import accountant # ------------------------------- # """ Most of the models are copied from https://github.com/ratschlab/RGAN """ # --- to do with latent space --- # def sample_Z(batch_size, seq_length, latent_dim, use_time=False, use_noisy_time=False): sample = np.float32(np.random.normal(size=[batch_size, seq_length, latent_dim])) if use_time: print('WARNING: use_time has different semantics') sample[:, :, 0] = np.linspace(0, 1.0 / seq_length, num=seq_length) return sample # --- samples for testing ---# def sample_T(batch_size, batch_idx): samples_aaa = np.load('./data/samples_aa.npy') num_samples_t = samples_aaa.shape[0] labels_aaa = np.load('./data/labels_aa.npy') idx_aaa = np.load('./data/idx_aa.npy') start_pos = batch_idx * batch_size end_pos = start_pos + batch_size T_mb = samples_aaa[start_pos:end_pos, :, :] L_mb = labels_aaa[start_pos:end_pos, :, :] I_mb = idx_aaa[start_pos:end_pos, :, :] return T_mb, L_mb, I_mb, num_samples_t def sample_TT(batch_size): samples_aaa = np.load('./data/samples_aa.npy') labels_aaa = np.load('./data/labels_aa.npy') idx_aaa = np.load('./data/idx_aa.npy') T_indices = np.random.choice(len(samples_aaa), size=batch_size, replace=False) T_mb = samples_aaa[T_indices, :, :] L_mb = labels_aaa[T_indices, :, :] I_mb = idx_aaa[T_indices, :, :] return T_mb, L_mb, I_mb # --- to do with training --- # def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length, latent_dim, num_signals): """ Train generator and discriminator for one epoch. """ # for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds): for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + G_rounds), D_rounds + G_rounds): # update the discriminator X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels) Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time) for d in range(D_rounds): # run the discriminator solver _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb}) # update the generator for g in range(G_rounds): # run the generator solver _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) # at the end, get the loss D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) D_loss_curr = np.mean(D_loss_curr) G_loss_curr = np.mean(G_loss_curr) return D_loss_curr, G_loss_curr def GAN_loss(Z, X, generator_settings, discriminator_settings): # normal GAN G_sample = generator(Z, **generator_settings) D_real, D_logit_real = discriminator(X, **discriminator_settings) D_fake, D_logit_fake = discriminator(G_sample, reuse=True, **discriminator_settings) # Measures the probability error in discrete classification tasks in which each class is independent # and not mutually exclusive. # logits: predicted labels?? D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)), 1) D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)), 1) D_loss = D_loss_real + D_loss_fake # G_loss = tf.reduce_mean(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), axis=1)) G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), 1) return D_loss, G_loss def GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_examples, l2norm_bound, batches_per_lot, sigma, dp=False): """ Optimizers """ discriminator_vars = [v for v in tf.trainable_variables() if v.name.startswith('discriminator')] generator_vars = [v for v in tf.trainable_variables() if v.name.startswith('generator')] if dp: print('Using differentially private SGD to train discriminator!') eps = tf.placeholder(tf.float32) delta = tf.placeholder(tf.float32) priv_accountant = accountant.GaussianMomentsAccountant(total_examples) clip = True l2norm_bound = l2norm_bound / batch_size batches_per_lot = 1 gaussian_sanitizer = sanitizer.AmortizedGaussianSanitizer( priv_accountant, [l2norm_bound, clip]) # the trick is that we need to calculate the gradient with respect to # each example in the batch, during the DP SGD step D_solver = dp_optimizer.DPGradientDescentOptimizer(learning_rate, [eps, delta], sanitizer=gaussian_sanitizer, sigma=sigma, batches_per_lot=batches_per_lot).minimize(D_loss, var_list=discriminator_vars) else: D_loss_mean_over_batch = tf.reduce_mean(D_loss) D_solver = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(D_loss_mean_over_batch, var_list=discriminator_vars) priv_accountant = None G_loss_mean_over_batch = tf.reduce_mean(G_loss) G_solver = tf.train.AdamOptimizer().minimize(G_loss_mean_over_batch, var_list=generator_vars) return D_solver, G_solver, priv_accountant # --- to do with the model --- # def create_placeholders(batch_size, seq_length, latent_dim, num_signals): Z = tf.placeholder(tf.float32, [batch_size, seq_length, latent_dim]) X = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals]) T = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals]) return Z, X, T def generator(z, hidden_units_g, seq_length, batch_size, num_signals, reuse=False, parameters=None, learn_scale=True): """ If parameters are supplied, initialise as such """ with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() if parameters is None: W_out_G_initializer = tf.truncated_normal_initializer() b_out_G_initializer = tf.truncated_normal_initializer() scale_out_G_initializer = tf.constant_initializer(value=1.0) lstm_initializer = None bias_start = 1.0 else: W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) try: scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) except KeyError: scale_out_G_initializer = tf.constant_initializer(value=1) assert learn_scale lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) bias_start = parameters['generator/rnn/lstm_cell/biases:0'] W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_signals], initializer=W_out_G_initializer) b_out_G = tf.get_variable(name='b_out_G', shape=num_signals, initializer=b_out_G_initializer) scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=learn_scale) # inputs inputs = z cell = LSTMCell(num_units=hidden_units_g, state_is_tuple=True, initializer=lstm_initializer, bias_start=bias_start, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=[seq_length] * batch_size, inputs=inputs) rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum # output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G) output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1] output_3d = tf.reshape(output_2d, [-1, seq_length, num_signals]) return output_3d def discriminator(x, hidden_units_d, seq_length, batch_size, reuse=False, parameters=None, batch_mean=False): with tf.variable_scope("discriminator") as scope: if reuse: scope.reuse_variables() if parameters is None: W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1], initializer=tf.truncated_normal_initializer()) b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=tf.truncated_normal_initializer()) else: W_out_D = tf.constant_initializer(value=parameters['discriminator/W_out_D:0']) b_out_D = tf.constant_initializer(value=parameters['discriminator/b_out_D:0']) # inputs inputs = x # add the average of the inputs to the inputs (mode collapse? if batch_mean: mean_over_batch = tf.stack([tf.reduce_mean(x, axis=0)] * batch_size, axis=0) inputs = tf.concat([x, mean_over_batch], axis=2) cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, inputs=inputs) # logit_final = tf.matmul(rnn_outputs[:, -1], W_final_D) + b_final_D logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum # real logits or actual output layer? # logit is a function that maps probabilities ([0,1]) to ([-inf,inf]) ? output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability?? # sigmoid output ([0,1]), Probability? return output, logits # --- display ----# def display_batch_progression(j, id_max): ''' See epoch progression ''' batch_progression = int((j / id_max) * 100) sys.stdout.write(str(batch_progression) + ' % epoch' + chr(13)) _ = sys.stdout.flush # --- to do with saving/loading --- # def dump_parameters(identifier, sess): """ Save model parmaters to a numpy file """ # dump_path = './experiments/parameters/' + identifier + '.npy' dump_path = './experiments/parameters/' + identifier + '.npy' model_parameters = dict() for v in tf.trainable_variables(): model_parameters[v.name] = sess.run(v) np.save(dump_path, model_parameters) print('Recorded', len(model_parameters), 'parameters to', dump_path) return True def load_parameters(identifier): """ Load parameters from a numpy file """ # load_path = './experiments/plots/parameters/' + identifier + '.npy' # load_path = './experiments/plots/parameters/parameters_60/' + identifier + '.npy' # load_path = './experiments/parameters/' + identifier + '.npy' # load_path = './experiments/parameters/' + identifier + '.npy' model_parameters = np.load(identifier).item() return model_parameters ================================================ FILE: plotting.py ================================================ import numpy as np import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import pdb from time import time from matplotlib.colors import hsv_to_rgb from pandas import read_table, read_hdf def plot_label(label, id): fig, ax = plt.subplots(1, 1) ax.plot(label) fig.savefig("./Figs/Label_" + str(id).zfill(2) + ".png") plt.clf() plt.close(fig) return def visualise_at_epoch(vis_sample, data, predict_labels, epoch, identifier, num_epochs, resample_rate_in_min, multivariate_mnist, seq_length, labels): # TODO: what's with all these arguments if data == 'mnist': if predict_labels: n_labels = 1 if one_hot: n_labels = 6 lab_votes = np.argmax(vis_sample[:, :, -n_labels:], axis=2) else: lab_votes = vis_sample[:, :, -n_labels:] labs, _ = mode(lab_votes, axis=1) samps = vis_sample[:, :, :-n_labels] else: labs = labels samps = vis_sample if multivariate_mnist: save_mnist_plot_sample(samps.reshape(-1, seq_length**2, 1), epoch, identifier, n_samples=6, labels=labs) else: save_mnist_plot_sample(samps, epoch, identifier, n_samples=6, labels=labs) else: save_plot_sample(vis_sample, epoch, identifier, n_samples=6, num_epochs=num_epochs) return True def save_plot_sample(samples, idx, identifier, n_samples=16, num_epochs=None, ncol=4): assert n_samples <= samples.shape[0] assert n_samples % ncol == 0 sample_length = samples.shape[1] if not num_epochs is None: col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8)) #convert hsv values in a numpy array to rgb values all values assumed to be in range [0, 1]. else: col = 'grey' x_points = np.arange(sample_length) #Return evenly spaced values within a given interval. nrow = int(n_samples/ncol) fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6)) for m in range(nrow): for n in range(ncol): # first column sample = samples[n*nrow + m, :, :] axarr[m, n].plot(x_points, sample, color=col) # axarr[m, n].set_ylim(-1, 1) for n in range(ncol): axarr[-1, n].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) fig.suptitle(idx) fig.subplots_adjust(hspace = 0.15) fig.savefig("./experiments/plots/gs/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") plt.clf() plt.close() return def save_plot_interpolate(input_samples, samples, idx, identifier, num_epochs=None, distances=None, sigma=1): """ very boilerplate, unsure how to make nicer """ n_samples = samples.shape[0] sample_length = samples.shape[1] if not num_epochs is None: col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8)) else: col = 'grey' x_points = np.arange(sample_length) if distances is None: nrow = n_samples else: nrow = n_samples + 1 ncol = 1 fig, axarr = plt.subplots(nrow, ncol, figsize=(3, 9)) if distances is None: startat = 0 else: startat = 1 axarr[0].plot(distances.dA, color='green', label='distance from A', linestyle='--', marker='o', markersize=4) axarr[0].plot(distances.dB, color='orange', label='distance from B', linestyle='dotted', marker='o', markersize=4) axarr[0].get_xaxis().set_visible(False) axarr[0].set_title('distance from endpoints') for m in range(startat, nrow): sample = samples[m-startat, :, 0] axarr[m].plot(x_points, sample, color=col) for m in range(startat, nrow): axarr[m].set_ylim(-1.1, 1.1) axarr[m].set_xlim(0, sample_length) axarr[m].spines["top"].set_visible(False) axarr[m].spines["bottom"].set_visible(False) axarr[m].spines["right"].set_visible(False) axarr[m].spines["left"].set_visible(False) axarr[m].tick_params(bottom='off', left='off') axarr[m].get_xaxis().set_visible(False) axarr[m].get_yaxis().set_visible(False) axarr[m].set_facecolor((0.96, 0.96, 0.96)) if not input_samples is None: # now do the real samples axarr[startat].plot(x_points, input_samples[0], color='green', linestyle='--') axarr[-1].plot(x_points, input_samples[1], color='green', linestyle='--') axarr[-1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) fig.suptitle(idx) fig.subplots_adjust(hspace = 0.2) fig.savefig("./experiments/plots/" + identifier + "_interpolate.png") fig.savefig("./experiments/plots/" + identifier + "_interpolate.pdf") plt.clf() plt.close() return def reconstruction_errors(identifier, train_errors, vali_errors, generated_errors, random_errors): """ Plot two histogram of the reconstruction errors. """ print(identifier) fig, axarr = plt.subplots(4, 1, sharex=True, figsize=(4, 8)) axarr[0].hist(train_errors, normed=1, color='green', bins=50) axarr[0].set_title("train reconstruction errors") axarr[1].hist(vali_errors, normed=1, color='blue', bins=50) axarr[1].set_title('vali reconstruction errors') axarr[2].hist(generated_errors, normed=1, color='pink', bins=50) axarr[2].set_title('generated reconstruction errors') axarr[3].hist(random_errors, normed=1, color='grey', bins=50) axarr[3].set_title('random reconstruction errors') for ax in axarr: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.tick_params(bottom='off', left='off') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) axarr[3].set_xlim(0, 0.05) plt.tight_layout() plt.savefig('./experiments/plots/' + identifier + '_reconstruction_errors.png') return True def save_plot_reconstruct(real_samples, model_samples, identifier): assert real_samples.shape == model_samples.shape sample_length = real_samples.shape[1] x_points = np.arange(sample_length) nrow = real_samples.shape[0] ncol = 2 fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6)) for m in range(nrow): real_sample = real_samples[m, :, 0] model_sample = model_samples[m, :, 0] axarr[m, 0].plot(x_points, real_sample, color='green') axarr[m, 1].plot(x_points, model_sample, color='red') axarr[-1, 0].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) axarr[-1, 1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) axarr[0, 0].set_title('real') axarr[0, 1].set_title('reconstructed') fig.subplots_adjust(hspace = 0.15) fig.savefig("./experiments/plots/" + identifier + "_reconstruct.png") plt.clf() plt.close() return def save_plot_vary_dimension(samples_list, idx, identifier, n_dim): """ """ assert len(samples_list) == n_dim sample_length = samples_list[0].shape[1] x_points = np.arange(sample_length) nrow = samples_list[0].shape[0] sidelength = n_dim*1.5 fig, axarr = plt.subplots(nrow, n_dim, sharex=True, sharey=True, figsize=(sidelength, sidelength)) for dim in range(n_dim): sample_dim = samples_list[dim] axarr[0, dim].set_title(dim) h = dim*1.0/n_dim # hue for n in range(nrow): sample = sample_dim[n, :, 0] axarr[n, dim].plot(x_points, sample, color='black') axarr[n, dim].spines["top"].set_visible(False) axarr[n, dim].spines["bottom"].set_visible(False) axarr[n, dim].spines["right"].set_visible(False) axarr[n, dim].spines["left"].set_visible(False) axarr[n, dim].tick_params(bottom='off', left='off') axarr[n, dim].get_xaxis().set_visible(False) axarr[n, dim].set_facecolor(hsv_to_rgb((h, 0 + 0.25*n/nrow, 0.96))) axarr[-1, dim].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) fig.suptitle(idx) fig.subplots_adjust(hspace = 0.11, wspace=0.11) fig.savefig("./experiments/plots/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") plt.clf() plt.close() return True def interpolate(sampleA, sampleB=None, n_steps=6): """ Plot the linear interpolation between two latent space points. """ weights = np.linspace(0, 1, n_steps) if sampleB is None: # do it "close by" sampleB = sampleA + np.random.normal(size=sampleA.shape, scale=0.05) samples = np.array([w*sampleB + (1-w)*sampleA for w in weights]) return samples def vary_latent_dimension(sample, dimension, n_steps=6): """ """ assert dimension <= sample.shape[1] scale = np.mean(np.abs(sample[:, dimension])) deviations = np.linspace(0, 2*scale, n_steps) samples = np.array([sample[:, :]]*n_steps) for n in range(n_steps): samples[n, :, dimension] += deviations[n] return samples def plot_sine_evaluation(real_samples, fake_samples, idx, identifier): """ Create histogram of fake (generated) samples frequency, amplitude distribution. Also for real samples. """ ### frequency seq_length = len(real_samples[0]) # assumes samples are all the same length frate = seq_length freqs_hz = np.fft.rfftfreq(seq_length)*frate # this is for labelling the plot # TODO, just taking axis 0 for now... w_real = np.mean(np.abs(np.fft.rfft(real_samples[:, :, 0])), axis=0) w_fake = np.mean(np.abs(np.fft.rfft(fake_samples[:, :, 0])), axis=0) ### amplitude A_real = np.max(np.abs(real_samples[:, :, 0]), axis=1) A_fake = np.max(np.abs(fake_samples[:, :, 0]), axis=1) ### now plot nrow = 2 ncol = 2 fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6)) # freq axarr[0, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_real), w_real), ymax=np.maximum(np.zeros_like(w_real), w_real), color='#30ba50') axarr[0, 0].set_title("frequency", fontsize=16) axarr[0, 0].set_ylabel("real", fontsize=16) axarr[1, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_fake), w_fake), ymax=np.maximum(np.zeros_like(w_fake), w_fake), color='#ba4730') axarr[1, 0].set_ylabel("generated", fontsize=16) # amplitude axarr[0, 1].hist(A_real, normed=True, color='#30ba50', bins=30) axarr[0, 1].set_title("amplitude", fontsize=16) axarr[1, 1].hist(A_fake, normed=True, color='#ba4730', bins=30) fig.savefig('./experiments/plots/' + identifier + '_eval' + str(idx).zfill(4) +'.png') plt.clf() plt.close() return True def plot_trace(identifier, xmax=250, final=False, dp=False): """ """ trace_path = './experiments/traces/' + identifier + '.trace.txt' da = read_table(trace_path, sep=' ') nrow = 3 if dp: trace_dp_path = './experiments/traces/' + identifier + '.dptrace.txt' da_dp = read_table(trace_dp_path, sep=' ') nrow += 1 ncol=1 fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6)) # D_loss d_handle, = axarr[0].plot(da.epoch, da.D_loss, color='red', label='discriminator') axarr[0].set_ylabel('D loss') # axarr[0].set_ylim(0.9, 1.6) if final: #D_ticks = [1.0, 1.2, 1.5] D_ticks = [0.5, 1.0, 1.5] axarr[0].get_yaxis().set_ticks(D_ticks) for tick in D_ticks: axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) # G loss ax_G = axarr[0].twinx() g_handle, = ax_G.plot(da.epoch, da.G_loss, color='green', ls='dashed', label='generator') ax_G.set_ylabel('G loss') if final: G_ticks = [2.5, 5] ax_G.get_yaxis().set_ticks(G_ticks) # for tick in G_ticks: # axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='green', alpha=1.0, zorder=0) ax_G.spines["top"].set_visible(False) ax_G.spines["bottom"].set_visible(False) ax_G.spines["right"].set_visible(False) ax_G.spines["left"].set_visible(False) ax_G.tick_params(bottom='off', right='off') axarr[0].legend(handles=[d_handle, g_handle], labels=['discriminator', 'generator']) # mmd da_mmd = da.loc[:, ['epoch', 'mmd2']].dropna() axarr[1].plot(da_mmd.epoch, da_mmd.mmd2, color='purple') axarr[1].set_ylabel('MMD$^2$') #axarr[1].set_ylim(0.0, 0.04) #ax_that = axarr[1].twinx() #ax_that.plot(da.that) #ax_that.set_ylabel('$\hat{t}$') #ax_that.set_ylim(0, 50) if final: mmd_ticks = [0.01, 0.02, 0.03] axarr[1].get_yaxis().set_ticks(mmd_ticks) for tick in mmd_ticks: axarr[1].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) # log likelihood da_ll = da.loc[:, ['epoch', 'll', 'real_ll']].dropna() axarr[2].plot(da_ll.epoch, da_ll.ll, color='orange') axarr[2].plot(da_ll.epoch, da_ll.real_ll, color='orange', alpha=0.5) axarr[2].set_ylabel('likelihood') axarr[2].set_xlabel('epoch') axarr[2].set_ylim(-750, 100) #axarr[2].set_ylim(-10000000, 500) if final: # ll_ticks = [-1.0*1e7, -0.5*1e7, 0] ll_ticks = [-500 ,-250, 0] axarr[2].get_yaxis().set_ticks(ll_ticks) for tick in ll_ticks: axarr[2].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) if dp: assert da_dp.columns[0] == 'epoch' epochs = da_dp['epoch'] eps_values = da_dp.columns[1:] for eps_string in eps_values: if 'eps' in eps_string: eps = eps_string[3:] else: eps = eps_string deltas = da_dp[eps_string] axarr[3].plot(epochs, deltas, label=eps) axarr[3].set_ylabel('delta') axarr[3].set_xlabel('epoch') axarr[3].legend() # beautify for ax in axarr: #ax.spines["top"].set_visible(True) ax.spines["top"].set_color((0, 0, 0, 0.3)) #ax.spines["bottom"].set_visible(False) ax.spines["bottom"].set_color((0, 0, 0, 0.3)) #ax.spines["right"].set_visible(False) ax.spines["right"].set_color((0, 0, 0, 0.3)) #ax.spines["left"].set_visible(False) ax.spines["left"].set_color((0, 0, 0, 0.3)) ax.tick_params(bottom='off', left='off') # make background grey # ax.set_facecolor((0.96, 0.96, 0.96)) ymin, ymax = ax.get_ylim() for x in np.arange(0, xmax+10, 10): ax.plot((x, x), (ymin, ymax), ls='dotted', lw=0.5, color='black', alpha=0.40, zorder=0) ax.set_xlim(-5, xmax) ax.get_yaxis().set_label_coords(-0.11,0.5) # bottom one fig.savefig('./experiments/traces/' + identifier + '_trace.png') fig.savefig('./experiments/traces/' + identifier + '_trace.pdf') plt.clf() plt.close() return True def save_samples(vis_sample, identifier, epoch): np.save('./experiments/plots/gs/' + identifier + '_gs_%s.npy' % epoch, vis_sample) return True def save_samples_real(vis_real, identifier): np.save('./experiments/plots/gs/' + identifier + '_gs_real.npy', vis_real) return True def save_mnist_plot_sample(samples, idx, identifier, n_samples, labels=None): """ Generates a grid showing mnist digits. """ assert n_samples <= samples.shape[0] if not labels is None: assert n_samples <= len(labels) if len(labels.shape) > 1 and not labels.shape[1] == 1: # one-hot label_titles = np.argmax(labels, axis=1) else: label_titles = labels else: label_titles = ['NA']*n_samples assert n_samples % 2 == 0 img_size = int(np.sqrt(samples.shape[1])) nrow = int(n_samples/2) ncol = 2 fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(8, 8)) for m in range(nrow): # first column sample = samples[m, :, 0] axarr[m, 0].imshow(sample.reshape([img_size,img_size]), cmap='gray') axarr[m, 0].set_title(str(label_titles[m])) # second column sample = samples[nrow + m, :, 0] axarr[m, 1].imshow(sample.reshape([img_size,img_size]), cmap='gray') axarr[m, 1].set_title(str(label_titles[m + nrow])) fig.suptitle(idx) fig.suptitle(idx) fig.subplots_adjust(hspace = 0.15) fig.savefig("./experiments/plots/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") plt.clf() plt.close() return def visualise_latent(Z, identifier): """ visualise a SINGLE point in the latent space """ seq_length = Z.shape[0] latent_dim = Z.shape[1] if latent_dim > 2: print('WARNING: Only visualising first two dimensions of latent space.') h = np.random.random() colours = np.array([hsv_to_rgb((h, i/seq_length, 0.96)) for i in range(seq_length)]) # plt.plot(Z[:, 0], Z[:, 1], c='grey', alpha=0.5) for i in range(seq_length): plt.scatter(Z[i, 0], Z[i, 1], marker='o', c=colours[i]) plt.savefig('./experiments/plots/' + identifier + '_Z.png') plt.clf() plt.close() return True # --- to do with the model --- # def plot_parameters(parameters, identifier): """ visualise the parameters of a GAN """ generator_out = parameters['generator/W_out_G:0'] generator_weights = parameters['generator/rnn/lstm_cell/weights:0'] # split this into four generator_matrices = np.split(generator_weights, 4, 1) fig, axarr = plt.subplots(5, 1, sharex=True, gridspec_kw = {'height_ratios':[0.2, 1, 1, 1, 1]}, figsize=(3,13)) axarr[0].matshow(generator_out.T, extent=[0,100,0,100]) axarr[0].set_title('W_out_G') axarr[1].matshow(generator_matrices[0]) axarr[1].set_title('LSTM weights (1)') axarr[2].matshow(generator_matrices[1]) axarr[2].set_title('LSTM weights (2)') axarr[3].matshow(generator_matrices[2]) axarr[3].set_title('LSTM weights (3)') axarr[4].matshow(generator_matrices[3]) axarr[4].set_title('LSTM weights (4)') for a in axarr: a.set_xlim(0, 100) a.set_ylim(0, 100) a.spines["top"].set_visible(False) a.spines["bottom"].set_visible(False) a.spines["right"].set_visible(False) a.spines["left"].set_visible(False) a.get_xaxis().set_visible(False) a.get_yaxis().set_visible(False) # a.tick_params(bottom='off', left='off', top='off') plt.tight_layout() plt.savefig('./experiments/plots/' + identifier + '_weights.png') return True ### TSTR ### def view_mnist_eval(identifier, train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY): """ Basically just http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html """ # resize everything side_length = int(np.sqrt(train_X.shape[1])) train_X = train_X.reshape(-1, side_length, side_length) synth_X = synth_X.reshape(-1, side_length, side_length) test_X = test_X.reshape(-1, side_length, side_length) # remember, they're wrecked in the outer function thanks to python synth_images_and_labels = list(zip(synth_X, synth_Y)) for index, (image, label) in enumerate(synth_images_and_labels[:4]): plt.subplot(4, 4, index + 1) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') if index == 0: plt.title('synth train: %i' % label) else: plt.title('%i' % label) train_images_and_labels = list(zip(train_X, train_Y)) for index, (image, label) in enumerate(train_images_and_labels[:4]): plt.subplot(4, 4, index + 5) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') if index == 0: plt.title('real train: %i' % label) else: plt.title('%i' % label) images_and_synthpreds = list(zip(test_X, synth_predY)) for index, (image, prediction) in enumerate(images_and_synthpreds[:4]): plt.subplot(4, 4, index + 9) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') if index == 0: plt.title('synth pred: %i' % prediction) else: plt.title('%i' % prediction) images_and_realpreds = list(zip(test_X, real_predY)) for index, (image, prediction) in enumerate(images_and_realpreds[:4]): plt.subplot(4, 4, index + 13) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') if index == 0: plt.title('real pred: %i' % prediction) else: plt.title('%i' % prediction) plt.tight_layout() plt.title(identifier) plt.savefig('./experiments/tstr/' + identifier + '_preds.png') return True # --- nips !!! --- # def nips_plot_rbf(sample, index, which='train'): if which == 'train': # col = '#167ea0' col = '#13af5f' else: col = 'black' sample_length = len(sample) sample = sample.reshape(sample_length) x_points = np.arange(sample_length) fig, axarr = plt.subplots(1, 1, figsize=(2, 2)) axarr.set_facecolor((0.95, 0.96, 0.96)) axarr.plot(x_points, sample, color=col) axarr.set_ylim(-1.5, 1.5) axarr.get_xaxis().set_visible(False) axarr.get_yaxis().set_visible(False) axarr.spines["top"].set_visible(False) axarr.spines["bottom"].set_visible(False) axarr.spines["right"].set_visible(False) axarr.spines["left"].set_visible(False) axarr.tick_params(bottom='off', left='off') plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.png') plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.pdf') plt.clf() plt.close() return True def nips_plot_sine(sample, index, which='train'): if which == 'train': #col = '#167ea0' #col = '#13af5f' col = '#1188ad' else: col = 'black' sample_length = len(sample) sample = sample.reshape(sample_length) sample_length = len(sample) sample = sample.reshape(sample_length) x_points = np.arange(sample_length) fig, axarr = plt.subplots(1, 1, figsize=(2, 2)) axarr.set_facecolor((0.95, 0.96, 0.96)) axarr.plot(x_points, sample, color=col) axarr.set_ylim(-1.1, 1.1) axarr.get_xaxis().set_visible(False) axarr.get_yaxis().set_visible(False) axarr.spines["top"].set_visible(False) axarr.spines["bottom"].set_visible(False) axarr.spines["right"].set_visible(False) axarr.spines["left"].set_visible(False) axarr.tick_params(bottom='off', left='off') plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.png') plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.pdf') plt.clf() plt.close() return True def nips_plot_mnist(sample, index, which='train'): plt.axis('off') plt.imshow(sample, cmap=plt.cm.gray, interpolation='nearest') plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.png') plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.pdf') plt.clf() plt.close() return True ================================================ FILE: tf_ops.py ================================================ ### from https://github.com/eugenium/MMD/blob/master/tf_ops.py import tensorflow as tf def sq_sum(t, name=None): "The squared Frobenius-type norm of a tensor, sum(t ** 2)." with tf.name_scope(name, "SqSum", [t]): t = tf.convert_to_tensor(t, name='t') return 2 * tf.nn.l2_loss(t) def dot(x, y, name=None): "The dot product of two vectors x and y." with tf.name_scope(name, "Dot", [x, y]): x = tf.convert_to_tensor(x, name='x') y = tf.convert_to_tensor(y, name='y') x.get_shape().assert_has_rank(1) y.get_shape().assert_has_rank(1) return tf.squeeze(tf.matmul(tf.expand_dims(x, 0), tf.expand_dims(y, 1))) ================================================ FILE: utils.py ================================================ #!/usr/bin/env ipython # Utility functions that don't fit in other scripts import argparse import json def rgan_options_parser(): """ Define parser to parse options from command line, with defaults. Refer to this function for definitions of various variables. """ parser = argparse.ArgumentParser(description='Train a GAN to generate sequential, real-valued data.') # meta-option parser.add_argument('--settings_file', help='json file of settings, overrides everything else', type=str, default='') # options pertaining to data parser.add_argument('--data', help='what kind of data to train with?', default='gp_rbf', choices=['gp_rbf', 'sine', 'mnist', 'load']) # parser.add_argument('--num_samples', type=int, help='how many training examples \ # to generate?', default=28*5*100) # parser.add_argument('--num_samples_t', type=int, help='how many testing examples \ # for anomaly detection?', default=28 * 5 * 100) parser.add_argument('--seq_length', type=int, default=30) parser.add_argument('--num_signals', type=int, default=1) parser.add_argument('--normalise', type=bool, default=False, help='normalise the \ training/vali/test data (during split)?') # parser.add_argument('--AD', type=bool, default=False, help='should we conduct anomaly detection?') ### for gp_rbf parser.add_argument('--scale', type=float, default=0.1) ### for sin (should be using subparsers for this...) parser.add_argument('--freq_low', type=float, default=1.0) parser.add_argument('--freq_high', type=float, default=5.0) parser.add_argument('--amplitude_low', type=float, default=0.1) parser.add_argument('--amplitude_high', type=float, default=0.9) ### for mnist parser.add_argument('--multivariate_mnist', type=bool, default=False) parser.add_argument('--full_mnist', type=bool, default=False) ### for loading parser.add_argument('--data_load_from', type=str, default='') ### for eICU parser.add_argument('--resample_rate_in_min', type=int, default=15) # hyperparameters of the model parser.add_argument('--hidden_units_g', type=int, default=100) parser.add_argument('--hidden_units_d', type=int, default=100) parser.add_argument('--hidden_units_e', type=int, default=100) parser.add_argument('--kappa', type=float, help='weight between final output \ and intermediate steps in discriminator cost (1 = all \ intermediate', default=1) parser.add_argument('--latent_dim', type=int, default=5, help='dimensionality \ of the latent/noise space') parser.add_argument('--weight', type=int, default=0.5, help='weight of score') parser.add_argument('--degree', type=int, default=1, help='norm degree') parser.add_argument('--batch_mean', type=bool, default=False, help='append the mean \ of the batch to all variables for calculating discriminator loss') parser.add_argument('--learn_scale', type=bool, default=False, help='make the \ "scale" parameter at the output of the generator learnable (else fixed \ to 1') # options pertaining to training parser.add_argument('--learning_rate', type=float, default=0.1) parser.add_argument('--batch_size', type=int, default=28) parser.add_argument('--num_epochs', type=int, default=100) parser.add_argument('--D_rounds', type=int, default=5, help='number of rounds \ of discriminator training') parser.add_argument('--G_rounds', type=int, default=1, help='number of rounds \ of generator training') parser.add_argument('--E_rounds', type=int, default=1, help='number of rounds \ of encoder training') # parser.add_argument('--use_time', type=bool, default=False, help='enforce \ # latent dimension 0 to correspond to time') parser.add_argument('--shuffle', type=bool, default=True) parser.add_argument('--eval_mul', type=bool, default=False) parser.add_argument('--eval_an', type=bool, default=False) parser.add_argument('--eval_single', type=bool, default=False) parser.add_argument('--wrong_labels', type=bool, default=False, help='augment \ discriminator loss with real examples with wrong (~shuffled, sort of) labels') # options pertaining to evaluation and exploration parser.add_argument('--identifier', type=str, default='test', help='identifier \ string for output files') parser.add_argument('--sub_id', type=str, default='test', help='identifier \ string for load parameters') # options pertaining to differential privacy parser.add_argument('--dp', type=bool, default=False, help='train discriminator \ with differentially private SGD?') parser.add_argument('--l2norm_bound', type=float, default=1e-5, help='bound on norm of individual gradients for DP training') parser.add_argument('--batches_per_lot', type=int, default=1, help='number of batches per lot (for DP)') parser.add_argument('--dp_sigma', type=float, default=1e-5, help='sigma for noise added (for DP)') return parser def load_settings_from_file(settings): """ Handle loading settings from a JSON file, filling in missing settings from the command line defaults, but otherwise overwriting them. """ settings_path = './experiments/settings/' + settings['settings_file'] + '.txt' print('Loading settings from', settings_path) settings_loaded = json.load(open(settings_path, 'r')) # check for settings missing in file for key in settings.keys(): if not key in settings_loaded: print(key, 'not found in loaded settings - adopting value from command line defaults: ', settings[key]) # overwrite parsed/default settings with those read from file, allowing for # (potentially new) default settings not present in file settings.update(settings_loaded) return settings