Showing preview only (260K chars total). Download the full file or copy to clipboard to get everything.
Repository: LiDan456/MAD-GANs
Branch: master
Commit: 3139a73a4112
Files: 45
Total size: 249.0 KB
Directory structure:
gitextract_fejrc7rh/
├── .gitattributes
├── AD.py
├── AD_Invert.py
├── DR_discriminator.py
├── README.md
├── RGAN.py
├── data_utils.py
├── differential_privacy/
│ ├── dp_sgd/
│ │ └── dp_optimizer/
│ │ ├── dp_optimizer.py
│ │ ├── sanitizer.py
│ │ └── utils.py
│ └── privacy_accountant/
│ └── tf/
│ └── accountant.py
├── eugenium_mmd.py
├── eval.py
├── experiments/
│ ├── parameters/
│ │ ├── kdd99_30_0.npy
│ │ ├── kdd99_30_1.npy
│ │ ├── kdd99_30_10.npy
│ │ ├── kdd99_30_11.npy
│ │ ├── kdd99_30_12.npy
│ │ ├── kdd99_30_13.npy
│ │ ├── kdd99_30_14.npy
│ │ ├── kdd99_30_15.npy
│ │ ├── kdd99_30_16.npy
│ │ ├── kdd99_30_17.npy
│ │ ├── kdd99_30_18.npy
│ │ ├── kdd99_30_19.npy
│ │ ├── kdd99_30_2.npy
│ │ ├── kdd99_30_20.npy
│ │ ├── kdd99_30_21.npy
│ │ ├── kdd99_30_22.npy
│ │ ├── kdd99_30_3.npy
│ │ ├── kdd99_30_4.npy
│ │ ├── kdd99_30_5.npy
│ │ ├── kdd99_30_6.npy
│ │ ├── kdd99_30_7.npy
│ │ ├── kdd99_30_8.npy
│ │ └── kdd99_30_9.npy
│ ├── plots/
│ │ └── gs/
│ │ └── kdd99_gs_real.npy
│ └── settings/
│ ├── kdd99.txt
│ └── kdd99_test.txt
├── mmd.py
├── mod_core_rnn_cell_impl.py
├── model.py
├── plotting.py
├── tf_ops.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
* text=auto
================================================
FILE: AD.py
================================================
import tensorflow as tf
import numpy as np
import pdb
import json
import model
from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm
import utils
import eval
import DR_discriminator
import data_utils
# from pyod.utils.utility import *
from sklearn.utils.validation import *
from sklearn.metrics.classification import *
from sklearn.metrics.ranking import *
from time import time
begin = time()
"""
Here, only the discriminator was used to do the anomaly detection
"""
# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)
# --- get data, split --- #
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
settings["eval_single"] = False
settings["eval_an"] = False
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
settings["num_signals"], settings["sub_id"], settings["eval_single"],
settings["eval_an"], data_path)
# --- save settings, data --- #
# no need
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)
class myADclass():
def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):
self.epoch = epoch
self.settings = settings
self.samples = samples
self.labels = labels
self.index = index
def ADfunc(self):
num_samples_t = self.samples.shape[0]
print('sample_shape:', self.samples.shape[0])
print('num_samples_t', num_samples_t)
# -- only discriminate one batch for one time -- #
D_test = np.empty([num_samples_t, self.settings['seq_length'], 1])
DL_test = np.empty([num_samples_t, self.settings['seq_length'], 1])
L_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])
I_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])
batch_times = num_samples_t // self.settings['batch_size']
for batch_idx in range(0, num_samples_t // self.settings['batch_size']):
# print('batch_idx:{}
# display batch progress
model.display_batch_progression(batch_idx, batch_times)
start_pos = batch_idx * self.settings['batch_size']
end_pos = start_pos + self.settings['batch_size']
T_mb = self.samples[start_pos:end_pos, :, :]
L_mmb = self.labels[start_pos:end_pos, :, :]
I_mmb = self.index[start_pos:end_pos, :, :]
para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
D_t, L_t = DR_discriminator.dis_trained_model(self.settings, T_mb, para_path)
D_test[start_pos:end_pos, :, :] = D_t
DL_test[start_pos:end_pos, :, :] = L_t
L_mb[start_pos:end_pos, :, :] = L_mmb
I_mb[start_pos:end_pos, :, :] = I_mmb
start_pos = (num_samples_t // self.settings['batch_size']) * self.settings['batch_size']
end_pos = start_pos + self.settings['batch_size']
size = samples[start_pos:end_pos, :, :].shape[0]
fill = np.ones([self.settings['batch_size'] - size, samples.shape[1], samples.shape[2]])
batch = np.concatenate([samples[start_pos:end_pos, :, :], fill], axis=0)
para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
D_t, L_t = DR_discriminator.dis_trained_model(self.settings, batch, para_path)
L_mmb = self.labels[start_pos:end_pos, :, :]
I_mmb = self.index[start_pos:end_pos, :, :]
D_test[start_pos:end_pos, :, :] = D_t[:size, :, :]
DL_test[start_pos:end_pos, :, :] = L_t[:size, :, :]
L_mb[start_pos:end_pos, :, :] = L_mmb
I_mb[start_pos:end_pos, :, :] = I_mmb
results = np.zeros([18, 4])
for i in range(2, 8):
tao = 0.1 * i
Accu2, Pre2, Rec2, F12 = DR_discriminator.detection_Comb(
DL_test, L_mb, I_mb, self.settings['seq_step'], tao)
print('seq_length:', self.settings['seq_length'])
print('Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
.format(self.epoch, tao, Accu2, Pre2, Rec2, F12))
results[i - 2, :] = [Accu2, Pre2, Rec2, F12]
Accu3, Pre3, Rec3, F13 = DR_discriminator.detection_Comb(
D_test, L_mb, I_mb, self.settings['seq_step'], tao)
print('seq_length:', self.settings['seq_length'])
print('Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
.format(self.epoch, tao, Accu3, Pre3, Rec3, F13))
results[i - 2+6, :] = [Accu3, Pre3, Rec3, F13]
Accu5, Pre5, Rec5, F15 = DR_discriminator.sample_detection(D_test, L_mb, tao)
print('seq_length:', self.settings['seq_length'])
print('sample-wise-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
.format(self.epoch, tao, Accu5, Pre5, Rec5, F15))
results[i - 2+12, :] = [Accu5, Pre5, Rec5, F15]
return results
if __name__ == "__main__":
print('Main Starting...')
Results = np.empty([settings['num_epochs'], 18, 4])
for epoch in range(settings['num_epochs']):
# for epoch in range(50, 60):
ob = myADclass(epoch)
Results[epoch, :, :] = ob.ADfunc()
# res_path = './experiments/plots/Results' + '_' + settings['sub_id'] + '_' + str(
# settings['seq_length']) + '.npy'
# np.save(res_path, Results)
print('Main Terminating...')
end = time() - begin
print('Testing terminated | Training time=%d s' % (end))
================================================
FILE: AD_Invert.py
================================================
import tensorflow as tf
import numpy as np
import pdb
import json
from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm
import data_utils
import plotting
import model
import mmd
import utils
import eval
import DR_discriminator
from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer
from differential_privacy.dp_sgd.dp_optimizer import sanitizer
from differential_privacy.privacy_accountant.tf import accountant
"""
Here, both the discriminator and generator were used to do the anomaly detection
"""
# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)
# --- get data, split --- #
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
settings["num_signals"], settings["sub_id"], settings["eval_single"],
settings["eval_an"], data_path)
# --- save settings, data --- #
# no need
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)
class myADclass():
def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):
self.epoch = epoch
self.settings = settings
self.samples = samples
self.labels = labels
self.index = index
def ADfunc(self):
num_samples_t = self.samples.shape[0]
t_size = 500
T_index = np.random.choice(num_samples_t, size=t_size, replace=False)
print('sample_shape:', self.samples.shape[0])
print('num_samples_t', num_samples_t)
# -- only discriminate one batch for one time -- #
D_test = np.empty([t_size, self.settings['seq_length'], 1])
DL_test = np.empty([t_size, self.settings['seq_length'], 1])
GG = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])
T_samples = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])
L_mb = np.empty([t_size, self.settings['seq_length'], 1])
I_mb = np.empty([t_size, self.settings['seq_length'], 1])
for batch_idx in range(0, t_size):
# print('epoch:{}'.format(self.epoch))
# print('batch_idx:{}'.format(batch_idx))
# display batch progress
model.display_batch_progression(batch_idx, t_size)
T_mb = self.samples[T_index[batch_idx], :, :]
L_mmb = self.labels[T_index[batch_idx], :, :]
I_mmb = self.index[T_index[batch_idx], :, :]
para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
D_t, L_t = DR_discriminator.dis_D_model(self.settings, T_mb, para_path)
Gs, Zs, error_per_sample, heuristic_sigma = DR_discriminator.invert(self.settings, T_mb, para_path,
g_tolerance=None,
e_tolerance=0.1, n_iter=None,
max_iter=1000,
heuristic_sigma=None)
GG[batch_idx, :, :] = Gs
T_samples[batch_idx, :, :] = T_mb
D_test[batch_idx, :, :] = D_t
DL_test[batch_idx, :, :] = L_t
L_mb[batch_idx, :, :] = L_mmb
I_mb[batch_idx, :, :] = I_mmb
# -- use self-defined evaluation functions -- #
# -- test different tao values for the detection function -- #
results = np.empty([5, 5])
# for i in range(2, 8):
# tao = 0.1 * i
tao = 0.5
lam = 0.8
Accu1, Pre1, Rec1, F11, FPR1, D_L1 = DR_discriminator.detection_D_I(DL_test, L_mb, I_mb, self.settings['seq_step'], tao)
print('seq_length:', self.settings['seq_length'])
print('D:Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
.format(self.epoch, tao, Accu1, Pre1, Rec1, F11, FPR1))
results[0, :] = [Accu1, Pre1, Rec1, F11, FPR1]
Accu2, Pre2, Rec2, F12, FPR2, D_L2 = DR_discriminator.detection_D_I(D_test, L_mb, I_mb, self.settings['seq_step'], tao)
print('seq_length:', self.settings['seq_length'])
print('D:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
.format(self.epoch, tao, Accu2, Pre2, Rec2, F12, FPR2))
results[1, :] = [Accu2, Pre2, Rec2, F12, FPR2]
Accu3, Pre3, Rec3, F13, FPR3, D_L3 = DR_discriminator.detection_R_D_I(DL_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)
print('seq_length:', self.settings['seq_length'])
print('RD:Comb-logits_based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
.format(self.epoch, tao, Accu3, Pre3, Rec3, F13, FPR3))
results[2, :] = [Accu3, Pre3, Rec3, F13, FPR3]
Accu4, Pre4, Rec4, F14, FPR4, D_L4 = DR_discriminator.detection_R_D_I(D_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)
print('seq_length:', self.settings['seq_length'])
print('RD:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
.format(self.epoch, tao, Accu4, Pre4, Rec4, F14, FPR4))
results[3, :] = [Accu4, Pre4, Rec4, F14, FPR4]
Accu5, Pre5, Rec5, F15, FPR5, D_L5 = DR_discriminator.detection_R_I(GG, T_samples, L_mb, self.settings['seq_step'],tao)
print('seq_length:', self.settings['seq_length'])
print('G:Comb-sample-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
.format(self.epoch, tao, Accu5, Pre5, Rec5, F15, FPR5))
results[4, :] = [Accu5, Pre5, Rec5, F15, FPR5]
return results, GG, D_test, DL_test
if __name__ == "__main__":
print('Main Starting...')
Results = np.empty([settings['num_epochs'], 5, 5])
t_size = 500
D_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])
DL_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])
GG = np.empty([settings['num_epochs'], t_size, settings['seq_length'], settings['num_signals']])
for epoch in range(settings['num_epochs']):
# for epoch in range(1):
ob = myADclass(epoch)
Results[epoch, :, :], GG[epoch, :, :, :], D_test[epoch, :, :, :], DL_test[epoch, :, :, :] = ob.ADfunc()
res_path = './experiments/plots/Results_Invert' + '_' + settings['sub_id'] + '_' + str(
settings['seq_length']) + '.npy'
np.save(res_path, Results)
dg_path = './experiments/plots/DG_Invert' + '_' + settings['sub_id'] + '_' + str(
settings['seq_length']) + '_'
np.save(dg_path + 'D_test.npy', D_test)
np.save(dg_path + 'DL_test.npy', DL_test)
np.save(dg_path + 'GG.npy', DL_test)
print('Main Terminating...')
================================================
FILE: DR_discriminator.py
================================================
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
import model
import mmd
from mod_core_rnn_cell_impl import LSTMCell
from sklearn.metrics import precision_recall_fscore_support
def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier):
aa = D_test.shape[0]
bb = D_test.shape[1]
D_L = D_L.reshape([aa, bb, -1])
x_points = np.arange(bb)
fig, ax = plt.subplots(4, 4, sharex=True)
for m in range(4):
for n in range(4):
D = D_test[n * 4 + m, :, :]
T = T_mb[n * 4 + m, :, :]
L = L_mb[n * 4 + m, :, :]
DL = D_L[n * 4 + m, :, :]
ax[m, n].plot(x_points, D, '--g', label='Pro')
ax[m, n].plot(x_points, T, 'b', label='Data')
ax[m, n].plot(x_points, L, 'k', label='Label')
ax[m, n].plot(x_points, DL, 'r', label='Label')
ax[m, n].set_ylim(-1, 1)
for n in range(4):
ax[-1, n].xaxis.set_ticks(range(0, bb, int(bb/6)))
fig.suptitle(epoch)
fig.subplots_adjust(hspace=0.15)
fig.savefig("./experiments/plots/DR_dis/" + identifier + "_epoch" + str(epoch).zfill(4) + ".png")
plt.clf()
plt.close()
return True
def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao):
aa = Label_test.shape[0]
bb = Label_test.shape[1]
LL = (aa-1)*seq_step+bb
Label_test = abs(Label_test.reshape([aa, bb]))
L_mb = L_mb .reshape([aa, bb])
I_mb = I_mb .reshape([aa, bb])
D_L = np.zeros([LL, 1])
L_L = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i*10+j)
D_L[i*seq_step+j] += Label_test[i, j]
L_L[i * seq_step + j] += L_mb[i, j]
Count[i * seq_step + j] += 1
D_L /= Count
L_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if D_L[i] > tao:
# true/negative
D_L[i] = 0
else:
# false/positive
D_L[i] = 1
cc = (D_L == L_L)
# print('D_L:', D_L)
# print('L_L:', L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
print('N:', N)
Accu = float((N / LL) * 100)
precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')
return Accu, precision, recall, f1,
def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao):
aa = DL_test.shape[0]
bb = DL_test.shape[1]
LL = (aa-1)*seq_step+bb
DL_test = abs(DL_test.reshape([aa, bb]))
L_mb = L_mb .reshape([aa, bb])
I_mb = I_mb .reshape([aa, bb])
D_L = np.zeros([LL, 1])
L_L = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i*10+j)
D_L[i*seq_step+j] += DL_test[i, j]
L_L[i * seq_step + j] += L_mb[i, j]
Count[i * seq_step + j] += 1
D_L /= Count
L_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if D_L[i] > tao:
# true/negative
D_L[i] = 0
else:
# false/positive
D_L[i] = 1
A = D_L[i]
B = L_L[i]
if A == 1 and B == 1:
TP += 1
elif A == 1 and B == 0:
FP += 1
elif A == 0 and B == 0:
TN += 1
elif A == 0 and B == 1:
FN += 1
cc = (D_L == L_L)
# print('D_L:', D_L)
# print('L_L:', L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
print('N:', N)
Accu = float((N / LL) * 100)
precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')
# true positive among all the detected positive
# Pre = (100 * TP) / (TP + FP + 1)
# # true positive among all the real positive
# Rec = (100 * TP) / (TP + FN + 1)
# # The F1 score is the harmonic average of the precision and recall,
# # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
# F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate--false alarm rate
FPR = (100 * FP) / (FP + TN+1)
return Accu, precision, recall, f1, FPR, D_L
def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao):
# point-wise detection for one dimension
aa = D_test.shape[0]
bb = D_test.shape[1]
LL = (aa-1) * seq_step + bb
# print('aa:', aa)
# print('bb:', bb)
# print('LL:', LL)
D_test = D_test.reshape([aa, bb])
L_mb = L_mb.reshape([aa, bb])
I_mb = I_mb.reshape([aa, bb])
D_L = np.zeros([LL, 1])
L_L = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i * 10 + j)
D_L[i * seq_step + j] += D_test[i, j]
L_L[i * seq_step + j] += L_mb[i, j]
Count[i * seq_step + j] += 1
D_L /= Count
L_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if D_L[i] > tao:
# true/negative
D_L[i] = 0
else:
# false/positive
D_L[i] = 1
A = D_L[i]
B = L_L[i]
if A == 1 and B == 1:
TP += 1
elif A == 1 and B == 0:
FP += 1
elif A == 0 and B == 0:
TN += 1
elif A == 0 and B == 1:
FN += 1
cc = (D_L == L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
Accu = float((N / LL) * 100)
precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')
# true positive among all the detected positive
# Pre = (100 * TP) / (TP + FP + 1)
# # true positive among all the real positive
# Rec = (100 * TP) / (TP + FN + 1)
# # The F1 score is the harmonic average of the precision and recall,
# # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
# F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate--false alarm rate
FPR = (100 * FP) / (FP + TN)
return Accu, precision, recall, f1, FPR, D_L
def detection_D_I(DD, L_mb, I_mb, seq_step, tao):
# point-wise detection for one dimension
aa = DD.shape[0]
bb = DD.shape[1]
LL = (aa-1)*seq_step+bb
DD = abs(DD.reshape([aa, bb]))
L_mb = L_mb .reshape([aa, bb])
I_mb = I_mb .reshape([aa, bb])
D_L = np.zeros([LL, 1])
L_L = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i*10+j)
D_L[i*10+j] += DD[i, j]
L_L[i * 10 + j] += L_mb[i, j]
Count[i * 10 + j] += 1
D_L /= Count
L_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if D_L[i] > tao:
# true/negative
D_L[i] = 0
else:
# false/positive
D_L[i] = 1
A = D_L[i]
B = L_L[i]
if A == 1 and B == 1:
TP += 1
elif A == 1 and B == 0:
FP += 1
elif A == 0 and B == 0:
TN += 1
elif A == 0 and B == 1:
FN += 1
cc = (D_L == L_L)
# print('D_L:', D_L)
# print('L_L:', L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
print('N:', N)
Accu = float((N / LL) * 100)
# true positive among all the detected positive
Pre = (100 * TP) / (TP + FP + 1)
# true positive among all the real positive
Rec = (100 * TP) / (TP + FN + 1)
# The F1 score is the harmonic average of the precision and recall,
# where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate--false alarm rate
FPR = (100 * FP) / (FP + TN+1)
return Accu, Pre, Rec, F1, FPR, D_L
def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam):
# point-wise detection for one dimension
# (1-lambda)*R(x)+lambda*D(x)
# lambda=0.5?
# D_test, Gs, T_mb, L_mb are of same size
R = np.absolute(Gs - T_mb)
R = np.mean(R, axis=2)
aa = DD.shape[0]
bb = DD.shape[1]
LL = (aa - 1) * seq_step + bb
DD = abs(DD.reshape([aa, bb]))
DD = 1-DD
L_mb = L_mb.reshape([aa, bb])
R = R.reshape([aa, bb])
D_L = np.zeros([LL, 1])
R_L = np.zeros([LL, 1])
L_L = np.zeros([LL, 1])
L_pre = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i*10+j)
D_L[i * 10 + j] += DD[i, j]
L_L[i * 10 + j] += L_mb[i, j]
R_L[i * 10 + j] += R[i, j]
Count[i * 10 + j] += 1
D_L /= Count
L_L /= Count
R_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if (1-lam)*R_L[i] + lam*D_L[i] > tao:
# false
L_pre[i] = 1
else:
# true
L_pre[i] = 0
A = L_pre[i]
# print('A:', A)
B = L_L[i]
# print('B:', B)
if A == 1 and B == 1:
TP += 1
elif A == 1 and B == 0:
FP += 1
elif A == 0 and B == 0:
TN += 1
elif A == 0 and B == 1:
FN += 1
cc = (L_pre == L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
Accu = float((N / (aa*bb)) * 100)
# true positive among all the detected positive
Pre = (100 * TP) / (TP + FP + 1)
# true positive among all the real positive
Rec = (100 * TP) / (TP + FN + 1)
# The F1 score is the harmonic average of the precision and recall,
# where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate
FPR = (100 * FP) / (FP + TN+1)
return Accu, Pre, Rec, F1, FPR, L_pre
def detection_R_I(Gs, T_mb, L_mb, seq_step, tao):
# point-wise detection for one dimension
# (1-lambda)*R(x)+lambda*D(x)
# lambda=0.5?
# D_test, Gs, T_mb, L_mb are of same size
R = np.absolute(Gs - T_mb)
R = np.mean(R, axis=2)
aa = R.shape[0]
bb = R.shape[1]
LL = (aa - 1) * seq_step + bb
L_mb = L_mb.reshape([aa, bb])
R = R.reshape([aa, bb])
L_L = np.zeros([LL, 1])
R_L = np.zeros([LL, 1])
L_pre = np.zeros([LL, 1])
Count = np.zeros([LL, 1])
for i in range(0, aa):
for j in range(0, bb):
# print('index:', i*10+j)
L_L[i * 10 + j] += L_mb[i, j]
R_L[i * 10 + j] += R[i, j]
Count[i * 10 + j] += 1
L_L /= Count
R_L /= Count
TP, TN, FP, FN = 0, 0, 0, 0
for i in range(LL):
if R_L[i] > tao:
# false
L_pre[i] = 1
else:
# true
L_pre[i] = 0
A = L_pre[i]
B = L_L[i]
if A == 1 and B == 1:
TP += 1
elif A == 1 and B == 0:
FP += 1
elif A == 0 and B == 0:
TN += 1
elif A == 0 and B == 1:
FN += 1
cc = (L_pre == L_L)
cc = list(cc.reshape([-1]))
N = cc.count(True)
Accu = float((N / (aa*bb)) * 100)
# true positive among all the detected positive
Pre = (100 * TP) / (TP + FP + 1)
# true positive among all the real positive
Rec = (100 * TP) / (TP + FN + 1)
# The F1 score is the harmonic average of the precision and recall,
# where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate
FPR = (100 * FP) / (FP + TN+1)
return Accu, Pre, Rec, F1, FPR, L_pre
def sample_detection(D_test, L_mb, tao):
# sample-wise detection for one dimension
aa = D_test.shape[0]
bb = D_test.shape[1]
D_test = D_test.reshape([aa, bb])
L_mb = L_mb.reshape([aa, bb])
L = np.sum(L_mb, 1)
# NN = 0-10
L[L > 0] = 1
D_L = np.empty([aa, ])
for i in range(aa):
if np.mean(D_test[i, :]) > tao:
# true/negative
D_L[i] = 0
else:
# false/positive
D_L[i] = 1
cc = (D_L == L)
# cc = list(cc)
N = list(cc).count(True)
Accu = float((N / (aa)) * 100)
precision, recall, f1, _ = precision_recall_fscore_support(L, D_L, average='binary')
return Accu, precision, recall, f1
def CUSUM_det(spe_n, spe_a, labels):
mu = np.mean(spe_n)
sigma = np.std(spe_n)
kk = 3*sigma
H = 15*sigma
print('H:', H)
tar = np.mean(spe_a)
mm = spe_a.shape[0]
SH = np.empty([mm, ])
SL = np.empty([mm, ])
for i in range(mm):
SH[-1] = 0
SL[-1] = 0
SH[i] = max(0, SH[i-1]+spe_a[i]-(tar+kk))
SL[i] = min(0, SL[i-1]+spe_a[i]-(tar-kk))
count = np.empty([mm, ])
TP = 0
TN = 0
FP = 0
FN = 0
for i in range(mm):
A = SH[i]
B = SL[i]
AA = H
BB = -H
if A <= AA and B >= BB:
count[i] = 0
else:
count[i] = 1
C = count[i]
D = labels[i]
if C == 1 and D == 1:
TP += 1
elif C == 1 and D == 0:
FP += 1
elif C == 0 and D == 0:
TN += 1
elif C == 0 and D == 1:
FN += 1
cc = (count == labels)
# cc = list(cc)
N = list(cc).count(True)
Accu = float((N / (mm)) * 100)
# true positive among all the detected positive
Pre = (100 * TP) / (TP + FP + 1)
# true positive among all the real positive
Rec = (100 * TP) / (TP + FN)
# The F1 score is the harmonic average of the precision and recall,
# where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
# False positive rate
FPR = (100 * FP) / (FP + TN)
return Accu, Pre, Rec, F1, FPR
def SPE(X, pc):
a = X.shape[0]
b = X.shape[1]
spe = np.empty([a])
# Square Prediction Error (square of residual distance)
# spe = X'(I-PP')X
I = np.identity(b, float) - np.matmul(pc.transpose(1, 0), pc)
# I = np.matmul(I, I)
for i in range(a):
x = X[i, :].reshape([b, 1])
y = np.matmul(x.transpose(1, 0), I)
spe[i] = np.matmul(y, x)
return spe
def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, learn_scale=True):
"""
If parameters are supplied, initialise as such
"""
# It is important to specify different variable scopes for the LSTM cells.
with tf.variable_scope("generator_o") as scope:
W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0'])
b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0'])
try:
scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0'])
except KeyError:
scale_out_G_initializer = tf.constant_initializer(value=1)
assert learn_scale
lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0'])
bias_start = parameters['generator/rnn/lstm_cell/biases:0']
W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer)
b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer)
scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=False)
inputs = z
cell = LSTMCell(num_units=hidden_units_g,
state_is_tuple=True,
initializer=lstm_initializer,
bias_start=bias_start,
reuse=reuse)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
cell=cell,
dtype=tf.float32,
sequence_length=[seq_length] * batch_size,
inputs=inputs)
rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g])
logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum
output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1]
output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features])
return output_3d
def discriminator_o(x, hidden_units_d, reuse=False, parameters=None):
with tf.variable_scope("discriminator_0") as scope:
W_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/W_out_D:0'])
b_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/b_out_D:0'])
W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1], initializer=W_out_D_initializer)
b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=b_out_D_initializer)
inputs = x
cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, inputs=inputs)
logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum
output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability??
# sigmoid output ([0,1]), Probability?
return output, logits
def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1,
n_iter=None, max_iter=10000, heuristic_sigma=None):
"""
Return the latent space points corresponding to a set of a samples
( from gradient descent )
Note: this function is designed for ONE sample generation
"""
# num_samples = samples.shape[0]
# cast samples to float32
samples = np.float32(samples)
# get the model
# if settings is a string, assume it's an identifier and load
if type(settings) == str:
settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))
# print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,)
# if not g_tolerance is None:
# print('until gradient norm is below', g_tolerance)
# else:
# print('until error is below', e_tolerance)
# get parameters
parameters = model.load_parameters(para_path)
# # assertions
# assert samples.shape[2] == settings['num_generated_features']
# create VARIABLE Z
Z = tf.get_variable(name='Z', shape=[1, settings['seq_length'],
settings['latent_dim']],
initializer=tf.random_normal_initializer())
# create outputs
G_samples = generator_o(Z, settings['hidden_units_g'], settings['seq_length'],
1, settings['num_generated_features'],
reuse=False, parameters=parameters)
# generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale']
# generator_settings = dict((k, settings[k]) for k in generator_vars)
# G_samples = model.generator(Z, **generator_settings, reuse=True)
fd = None
# define loss mmd-based loss
if heuristic_sigma is None:
heuristic_sigma = mmd.median_pairwise_distance_o(samples) # this is noisy
print('heuristic_sigma:', heuristic_sigma)
samples = tf.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])
Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1)))
similarity_per_sample = tf.diag_part(Kxy)
reconstruction_error_per_sample = 1 - similarity_per_sample
# reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2])
similarity = tf.reduce_mean(similarity_per_sample)
reconstruction_error = 1 - similarity
# updater
# solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z])
# solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z])
solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(reconstruction_error_per_sample, var_list=[Z])
# solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z])
grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0]
grad_per_Z = tf.norm(grad_Z, axis=(1, 2))
grad_norm = tf.reduce_mean(grad_per_Z)
# solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z])
print('Finding latent state corresponding to samples...')
sess = tf.Session()
sess.run(tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
error = sess.run(reconstruction_error, feed_dict=fd)
g_n = sess.run(grad_norm, feed_dict=fd)
# print(g_n)
i = 0
if not n_iter is None:
while i < n_iter:
_ = sess.run(solver, feed_dict=fd)
error = sess.run(reconstruction_error, feed_dict=fd)
i += 1
else:
if not g_tolerance is None:
while g_n > g_tolerance:
_ = sess.run(solver, feed_dict=fd)
error, g_n = sess.run([reconstruction_error, grad_norm], feed_dict=fd)
i += 1
print(error, g_n)
if i > max_iter:
break
else:
while np.abs(error) > e_tolerance:
_ = sess.run(solver, feed_dict=fd)
error = sess.run(reconstruction_error, feed_dict=fd)
i += 1
# print(error)
if i > max_iter:
break
Zs = sess.run(Z, feed_dict=fd)
Gs = sess.run(G_samples, feed_dict={Z: Zs})
error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd)
print('Z found in', i, 'iterations with final reconstruction error of', error)
tf.reset_default_graph()
return Gs, Zs, error_per_sample, heuristic_sigma
def dis_trained_model(settings, samples, para_path):
"""
Return the discrimination results of num_samples testing samples from a trained model described by settings dict
Note: this function is designed for ONE sample discrimination
"""
# if settings is a string, assume it's an identifier and load
if type(settings) == str:
settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))
num_samples = samples.shape[0]
samples = np.float32(samples)
num_variables = samples.shape[2]
# samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])
# get the parameters, get other variables
# parameters = model.load_parameters(settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch))
parameters = model.load_parameters(para_path)
# settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch)
# create placeholder, T samples
# T = tf.placeholder(tf.float32, [settings['batch_size'], settings['seq_length'], settings['num_generated_features']])
T = tf.placeholder(tf.float32, [num_samples, settings['seq_length'], num_variables])
# create the discriminator (GAN)
# normal GAN
D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# with tf.device('/gpu:1'):
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})
tf.reset_default_graph()
return D_t, L_t
def dis_D_model(settings, samples, para_path):
"""
Return the discrimination results of num_samples testing samples from a trained model described by settings dict
Note: this function is designed for ONE sample discrimination
"""
# if settings is a string, assume it's an identifier and load
if type(settings) == str:
settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))
# num_samples = samples.shape[0]
samples = np.float32(samples)
samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])
# get the parameters, get other variables
parameters = model.load_parameters(para_path)
# create placeholder, T samples
T = tf.placeholder(tf.float32, [1, settings['seq_length'], settings['num_generated_features']])
# create the discriminator (GAN or CGAN)
# normal GAN
D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)
# D_t, L_t = model.discriminator(T, settings['hidden_units_d'], settings['seq_length'], num_samples, reuse=False,
# parameters=parameters, cond_dim=0, c=None, batch_mean=False)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})
tf.reset_default_graph()
return D_t, L_t
================================================
FILE: README.md
================================================
# -- Multivariate Anomaly Detection for Time Series Data with GANs -- #
# MAD-GAN
This repository contains code for the paper, _[MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks](https://arxiv.org/pdf/1901.04997.pdf)_, by Dan Li, Dacheng Chen, Jonathan Goh, and See-Kiong Ng.
MAD-GAN is a refined version of GAN-AD at _[Anomaly Detection with Generative Adversarial Networks for Multivariate Time Series](https://arxiv.org/pdf/1809.04758.pdf)_ The code can be found at https://github.com/LiDan456/GAN-AD
(We are still working on this topic, will upload the completed version later...)
## Overview
We used generative adversarial networks (GANs) to do anomaly detection for time series data.
The GAN framework was **R**GAN, whihc was taken from the paper, _[Real-valued (Medical) Time Series Generation with Recurrent Conditional GANs](https://arxiv.org/abs/1706.02633).
Please refer to https://github.com/ratschlab/RGAN for the original code.
## Quickstart
- Python3
- Please unpack the data.7z file in the data folder before run RGAN.py and AD.py
- To train the model:
""" python RGAN.py --settings_file kdd99 """
- To do anomaly detection:
""" python AD.py --settings_file kdd99_test"""
""" python AD_Invert.py --settings_file kdd99_test"""
## Data
We apply our method on the SWaT and WADI datasets in the paper, however, we didn't upload the data in this repository. Please refer to https://itrust.sutd.edu.sg/ and send request to iTrust is you want to try the data.
In this repository we used kdd cup 1999 dataset as an example (please unpack the data.7z file in the data folder before run RGAN.py and AD.py). You can also down load the original data at http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html
================================================
FILE: RGAN.py
================================================
import numpy as np
import tensorflow as tf
import pdb
import random
import json
from scipy.stats import mode
import data_utils
import plotting
import model
import utils
import eval
import DR_discriminator
from time import time
from math import floor
from mmd import rbf_mmd2, median_pairwise_distance, mix_rbf_mmd2_and_ratio
begin = time()
tf.logging.set_verbosity(tf.logging.ERROR)
# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)
# --- get data, split --- #
# samples, pdf, labels = data_utils.get_data(settings)
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
settings["eval_an"] = False
settings["eval_single"] = False
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
settings["num_signals"], settings['sub_id'], settings["eval_single"],
settings["eval_an"], data_path)
print('samples_size:',samples.shape)
# -- number of variables -- #
num_variables = samples.shape[2]
print('num_variables:', num_variables)
# --- save settings, data --- #
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)
# --- build model --- #
# preparation: data placeholders and model parameters
Z, X, T = model.create_placeholders(batch_size, seq_length, latent_dim, num_variables)
discriminator_vars = ['hidden_units_d', 'seq_length', 'batch_size', 'batch_mean']
discriminator_settings = dict((k, settings[k]) for k in discriminator_vars)
generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'learn_scale']
generator_settings = dict((k, settings[k]) for k in generator_vars)
generator_settings['num_signals'] = num_variables
# model: GAN losses
D_loss, G_loss = model.GAN_loss(Z, X, generator_settings, discriminator_settings)
D_solver, G_solver, priv_accountant = model.GAN_solvers(D_loss, G_loss, learning_rate, batch_size,
total_examples=samples.shape[0],
l2norm_bound=l2norm_bound,
batches_per_lot=batches_per_lot, sigma=dp_sigma, dp=dp)
# model: generate samples for visualization
G_sample = model.generator(Z, **generator_settings, reuse=True)
# # --- evaluation settings--- #
#
# # frequency to do visualisations
# num_samples = samples.shape[0]
# vis_freq = max(6600 // num_samples, 1)
# eval_freq = max(6600// num_samples, 1)
#
# # get heuristic bandwidth for mmd kernel from evaluation samples
# heuristic_sigma_training = median_pairwise_distance(samples)
# best_mmd2_so_far = 1000
#
# # optimise sigma using that (that's t-hat)
# batch_multiplier = 5000 // batch_size
# eval_size = batch_multiplier * batch_size
# eval_eval_size = int(0.2 * eval_size)
# eval_real_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])
# eval_sample_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])
# n_sigmas = 2
# sigma = tf.get_variable(name='sigma', shape=n_sigmas, initializer=tf.constant_initializer(
# value=np.power(heuristic_sigma_training, np.linspace(-1, 3, num=n_sigmas))))
# mmd2, that = mix_rbf_mmd2_and_ratio(eval_real_PH, eval_sample_PH, sigma)
# with tf.variable_scope("SIGMA_optimizer"):
# sigma_solver = tf.train.RMSPropOptimizer(learning_rate=0.05).minimize(-that, var_list=[sigma])
# # sigma_solver = tf.train.AdamOptimizer().minimize(-that, var_list=[sigma])
# # sigma_solver = tf.train.AdagradOptimizer(learning_rate=0.1).minimize(-that, var_list=[sigma])
# sigma_opt_iter = 2000
# sigma_opt_thresh = 0.001
# sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name]
# --- run the program --- #
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
# sess = tf.Session()
sess.run(tf.global_variables_initializer())
# # -- plot the real samples -- #
vis_real_indices = np.random.choice(len(samples), size=16)
vis_real = np.float32(samples[vis_real_indices, :, :])
plotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)
plotting.save_samples_real(vis_real, identifier)
# --- train --- #
train_vars = ['batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim']
train_settings = dict((k, settings[k]) for k in train_vars)
train_settings['num_signals'] = num_variables
t0 = time()
MMD = np.zeros([num_epochs, ])
for epoch in range(num_epochs):
# for epoch in range(1):
# -- train epoch -- #
D_loss_curr, G_loss_curr = model.train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss,
D_solver, G_solver, **train_settings)
# # -- eval -- #
# # visualise plots of generated samples, with/without labels
# # choose which epoch to visualize
#
# # random input vectors for the latent space, as the inputs of generator
# vis_ZZ = model.sample_Z(batch_size, seq_length, latent_dim, use_time)
#
# # # -- generate samples-- #
# vis_sample = sess.run(G_sample, feed_dict={Z: vis_ZZ})
# # # -- visualize the generated samples -- #
# plotting.save_plot_sample(vis_sample, epoch, identifier, n_samples=16, num_epochs=None, ncol=4)
# # plotting.save_plot_sample(vis_sample, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)
# # # save the generated samples in cased they might be useful for comparison
# plotting.save_samples(vis_sample, identifier, epoch)
# -- print -- #
print('epoch, D_loss_curr, G_loss_curr, seq_length')
print('%d\t%.4f\t%.4f\t%d' % (epoch, D_loss_curr, G_loss_curr, seq_length))
# # -- compute mmd2 and if available, prob density -- #
# if epoch % eval_freq == 0:
# # how many samples to evaluate with?
# eval_Z = model.sample_Z(eval_size, seq_length, latent_dim, use_time)
# eval_sample = np.empty(shape=(eval_size, seq_length, num_signals))
# for i in range(batch_multiplier):
# eval_sample[i * batch_size:(i + 1) * batch_size, :, :] = sess.run(G_sample, feed_dict={ Z: eval_Z[i * batch_size:(i + 1) * batch_size]})
# eval_sample = np.float32(eval_sample)
# eval_real = np.float32(samples['vali'][np.random.choice(len(samples['vali']), size=batch_multiplier * batch_size), :, :])
#
# eval_eval_real = eval_real[:eval_eval_size]
# eval_test_real = eval_real[eval_eval_size:]
# eval_eval_sample = eval_sample[:eval_eval_size]
# eval_test_sample = eval_sample[eval_eval_size:]
#
# # MMD
# # reset ADAM variables
# sess.run(tf.initialize_variables(sigma_opt_vars))
# sigma_iter = 0
# that_change = sigma_opt_thresh * 2
# old_that = 0
# while that_change > sigma_opt_thresh and sigma_iter < sigma_opt_iter:
# new_sigma, that_np, _ = sess.run([sigma, that, sigma_solver],
# feed_dict={eval_real_PH: eval_eval_real, eval_sample_PH: eval_eval_sample})
# that_change = np.abs(that_np - old_that)
# old_that = that_np
# sigma_iter += 1
# opt_sigma = sess.run(sigma)
# try:
# mmd2, that_np = sess.run(mix_rbf_mmd2_and_ratio(eval_test_real, eval_test_sample, biased=False, sigmas=sigma))
# except ValueError:
# mmd2 = 'NA'
# that = 'NA'
#
# MMD[epoch, ] = mmd2
# -- save model parameters -- #
model.dump_parameters(sub_id + '_' + str(seq_length) + '_' + str(epoch), sess)
np.save('./experiments/plots/gs/' + identifier + '_' + 'MMD.npy', MMD)
end = time() - begin
print('Training terminated | Training time=%d s' %(end) )
print("Training terminated | training time = %ds " % (time() - begin))
================================================
FILE: data_utils.py
================================================
import numpy as np
import pandas as pd
import pdb
import re
from time import time
import json
import random
import model
from scipy.spatial.distance import pdist, squareform
from scipy.stats import multivariate_normal, invgamma, mode
from scipy.special import gamma
from scipy.misc.pilutil import imresize
from functools import partial
from math import ceil
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
# --- deal with the SWaT data --- #
def swat(seq_length, seq_step, num_signals, randomize=False):
""" Load and serialise """
# train = np.load('./data/swat.npy')
# print('Loaded swat from .npy')
train = np.loadtxt(open('./data/swat.csv'), delimiter=',')
print('Loaded swat from .csv')
m, n = train.shape # m=496800, n=52
for i in range(n - 1):
A = max(train[:, i])
if A != 0:
train[:, i] /= max(train[:, i])
# scale from -1 to 1
train[:, i] = 2 * train[:, i] - 1
else:
train[:, i] = train[:, i]
samples = train[21600:, 0:n-1]
labels = train[21600:, n-1] # the last colummn is label
#############################
# -- choose variable for uni-variate GAN-AD -- #
# samples = samples[:, [1, 8, 18, 28]]
############################
# -- apply PCA dimension reduction for multi-variate GAN-AD -- #
from sklearn.decomposition import PCA
# ALL SENSORS IDX
# XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]
# X_n = samples[:, XS]
# X_a = samples_a[:, XS]
# All VARIABLES
X_n = samples
####################################
###################################
# -- the best PC dimension is chosen pc=5 -- #
n_components = num_signals
pca = PCA(n_components, svd_solver='full')
pca.fit(X_n)
ex_var = pca.explained_variance_ratio_
pc = pca.components_
# projected values on the principal component
T_n = np.matmul(X_n, pc.transpose(1, 0))
samples = T_n
# # only for one-dimensional
# samples = T_n.reshape([samples.shape[0], ])
###########################################
###########################################
# seq_length = 7200
num_samples = (samples.shape[0]-seq_length)//seq_step
print("num_samples:", num_samples)
print("num_signals:", num_signals)
aa = np.empty([num_samples, seq_length, num_signals])
bb = np.empty([num_samples, seq_length, 1])
for j in range(num_samples):
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j*seq_step + seq_length), i]
# samples = aa[:, 0:7200:200, :]
# labels = bb[:, 0:7200:200, :]
samples = aa
labels = bb
return samples, labels
def swat_birgan(seq_length, seq_step, num_signals, randomize=False):
""" Load and serialise """
# train = np.load('./data/swat.npy')
# print('Loaded swat from .npy')
train = np.loadtxt(open('./data/swat.csv'), delimiter=',')
print('Loaded swat from .csv')
m, n = train.shape # m=496800, n=52
for i in range(n - 1):
A = max(train[:, i])
if A != 0:
train[:, i] /= max(train[:, i])
# scale from -1 to 1
train[:, i] = 2 * train[:, i] - 1
else:
train[:, i] = train[:, i]
samples = train[21600:, 0:n-1]
labels = train[21600:, n-1] # the last colummn is label
#############################
# # -- choose variable for uni-variate GAN-AD -- #
# # samples = samples[:, [1, 8, 18, 28]]
###########################################
###########################################
nn = samples.shape[1]
num_samples = (samples.shape[0]-seq_length)//seq_step
aa = np.empty([num_samples, nn, nn])
AA = np.empty([seq_length, nn])
bb = np.empty([num_samples, seq_length, 1])
print('Pre-process training data...')
for j in range(num_samples):
# display batch progress
model_bigan.display_batch_progression(j, num_samples)
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
for i in range(nn):
AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
aa[j, :, :] = np.cov(AA.T)
samples = aa
labels = bb
return samples, labels
def swat_test(seq_length, seq_step, num_signals, randomize=False):
""" Load and serialise """
# test = np.load('./data/swat_a.npy')
# print('Loaded swat_a from .npy')
test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')
print('Loaded swat_a from .csv')
m, n = test.shape # m1=449919, n1=52
for i in range(n - 1):
B = max(test[:, i])
if B != 0:
test[:, i] /= max(test[:, i])
# scale from -1 to 1
test[:, i] = 2 * test[:, i] - 1
else:
test[:, i] = test[:, i]
samples = test[:, 0:n - 1]
labels = test[:, n - 1]
idx = np.asarray(list(range(0, m))) # record the idx of each point
#############################
# -- choose variable for uni-variate GAN-AD -- #
# samples = samples[:, [1,2,3,4]]
# samples_a = samples_a[:, [1,2,3,4]]
############################
############################
# -- apply PCA dimension reduction for multi-variate GAN-AD -- #
from sklearn.decomposition import PCA
import DR_discriminator as dr
# ALL SENSORS IDX
# XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]
# X_n = samples[:, XS]
# X_a = samples_a[:, XS]
# All VARIABLES
X_a = samples
####################################
###################################
# -- the best PC dimension is chosen pc=5 -- #
n_components = num_signals
pca_a = PCA(n_components, svd_solver='full')
pca_a.fit(X_a)
pc_a = pca_a.components_
# projected values on the principal component
T_a = np.matmul(X_a, pc_a.transpose(1, 0))
samples = T_a
# # only for one-dimensional
# samples = T_a.reshape([samples.shape[0], ])
###########################################
###########################################
num_samples_t = (samples.shape[0] - seq_length) // seq_step
aa = np.empty([num_samples_t, seq_length, num_signals])
bb = np.empty([num_samples_t, seq_length, 1])
bbb = np.empty([num_samples_t, seq_length, 1])
for j in range(num_samples_t):
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
samples = aa
labels = bb
index = bbb
return samples, labels, index
def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False):
""" Load and serialise """
# train = np.load('./data/swat.npy')
# print('Loaded swat from .npy')
test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')
print('Loaded swat_a from .csv')
m, n = test.shape # m1=449919, n1=52
for i in range(n - 1):
B = max(test[:, i])
if B != 0:
test[:, i] /= max(test[:, i])
# scale from -1 to 1
test[:, i] = 2 * test[:, i] - 1
else:
test[:, i] = test[:, i]
samples = test[:, 0:n - 1]
labels = test[:, n - 1]
# idx = np.asarray(list(range(0, m))) # record the idx of each point
#############################
# # -- choose variable for uni-variate GAN-AD -- #
# # samples = samples[:, [1, 8, 18, 28]]
###########################################
###########################################
nn = samples.shape[1]
num_samples = (samples.shape[0]-seq_length)//seq_step
aa = np.empty([num_samples, nn, nn])
AA = np.empty([seq_length, nn])
bb = np.empty([num_samples, seq_length, 1])
print('Pre-process testing data...')
for j in range(num_samples):
# display batch progress
model_bigan.display_batch_progression(j, num_samples)
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
for i in range(nn):
AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
aa[j, :, :] = np.cov(AA.T)
samples = aa
labels = bb
return samples, labels
def wadi(seq_length, seq_step, num_signals, randomize=False):
train = np.load('./data/wadi.npy')
print('Loaded wadi from .npy')
m, n = train.shape # m=1048571, n=119
for i in range(n-1):
A = max(train[:, i])
if A != 0:
train[:, i] /= max(train[:, i])
# scale from -1 to 1
train[:, i] = 2 * train[:, i] - 1
else:
train[:, i] = train[:, i]
samples = train[259200:, 0:n-1] # normal
labels = train[259200:, n-1]
#############################
samples = samples[:, [0, 3, 6, 17]]
# samples = samples[:, 0]
############################
# # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
# from sklearn.decomposition import PCA
# import DR_discriminator as dr
# X_n = samples
# ####################################
# ###################################
# # -- the best PC dimension is chosen pc=8 -- #
# n_components = num_signals
# pca = PCA(n_components, svd_solver='full')
# pca.fit(X_n)
# pc = pca.components_
# # projected values on the principal component
# T_n = np.matmul(X_n, pc.transpose(1, 0))
#
# samples = T_n
# # # only for one-dimensional
# # samples = T_n.reshape([samples.shape[0], ])
###########################################
###########################################
seq_length = 10800
num_samples = (samples.shape[0] - seq_length) // seq_step
print("num_samples:", num_samples)
print("num_signals:", num_signals)
aa = np.empty([num_samples, seq_length, num_signals])
bb = np.empty([num_samples, seq_length, 1])
for j in range(num_samples):
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
# aa[j, :, :] = np.reshape(samples[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
samples = aa[:, 0:10800:300, :]
labels = bb[:, 0:10800:300, :]
return samples, labels
def wadi_test(seq_length, seq_step, num_signals, randomize=False):
test = np.load('./data/wadi_a.npy')
print('Loaded wadi_a from .npy')
m, n = test.shape # m1=172801, n1=119
for i in range(n - 1):
B = max(test[:, i])
if B != 0:
test[:, i] /= max(test[:, i])
# scale from -1 to 1
test[:, i] = 2 * test[:, i] - 1
else:
test[:, i] = test[:, i]
samples = test[:, 0:n - 1]
labels = test[:, n - 1]
idx = np.asarray(list(range(0, m))) # record the idx of each point
#############################
############################
# -- apply PCA dimension reduction for multi-variate GAN-AD -- #
from sklearn.decomposition import PCA
import DR_discriminator as dr
X_a = samples
####################################
###################################
# -- the best PC dimension is chosen pc=8 -- #
n_components = num_signals
pca_a = PCA(n_components, svd_solver='full')
pca_a.fit(X_a)
pc_a = pca_a.components_
# projected values on the principal component
T_a = np.matmul(X_a, pc_a.transpose(1, 0))
samples = T_a
# # only for one-dimensional
# samples = T_a.reshape([samples.shape[0], ])
###########################################
###########################################
num_samples_t = (samples.shape[0] - seq_length) // seq_step
aa = np.empty([num_samples_t, seq_length, num_signals])
bb = np.empty([num_samples_t, seq_length, 1])
bbb = np.empty([num_samples_t, seq_length, 1])
for j in range(num_samples_t):
bb[j, :, :] = np.reshape(labels[(j * 10):(j * seq_step + seq_length)], [-1, 1])
bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
samples = aa
labels = bb
index = bbb
return samples, labels, index
def kdd99(seq_length, seq_step, num_signals):
train = np.load('./data/kdd99_train.npy')
print('load kdd99_train from .npy')
m, n = train.shape # m=562387, n=35
# normalization
for i in range(n - 1):
# print('i=', i)
A = max(train[:, i])
# print('A=', A)
if A != 0:
train[:, i] /= max(train[:, i])
# scale from -1 to 1
train[:, i] = 2 * train[:, i] - 1
else:
train[:, i] = train[:, i]
samples = train[:, 0:n - 1]
labels = train[:, n - 1] # the last colummn is label
#############################
############################
# -- apply PCA dimension reduction for multi-variate GAN-AD -- #
from sklearn.decomposition import PCA
X_n = samples
####################################
###################################
# -- the best PC dimension is chosen pc=6 -- #
n_components = num_signals
pca = PCA(n_components, svd_solver='full')
pca.fit(X_n)
ex_var = pca.explained_variance_ratio_
pc = pca.components_
# projected values on the principal component
T_n = np.matmul(X_n, pc.transpose(1, 0))
samples = T_n
# # only for one-dimensional
# samples = T_n.reshape([samples.shape[0], ])
###########################################
###########################################
num_samples = (samples.shape[0] - seq_length) // seq_step
aa = np.empty([num_samples, seq_length, num_signals])
bb = np.empty([num_samples, seq_length, 1])
for j in range(num_samples):
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
samples = aa
labels = bb
return samples, labels
def kdd99_test(seq_length, seq_step, num_signals):
test = np.load('./data/kdd99_test.npy')
print('load kdd99_test from .npy')
m, n = test.shape # m1=494021, n1=35
for i in range(n - 1):
B = max(test[:, i])
if B != 0:
test[:, i] /= max(test[:, i])
# scale from -1 to 1
test[:, i] = 2 * test[:, i] - 1
else:
test[:, i] = test[:, i]
samples = test[:, 0:n - 1]
labels = test[:, n - 1]
idx = np.asarray(list(range(0, m))) # record the idx of each point
#############################
############################
# -- apply PCA dimension reduction for multi-variate GAN-AD -- #
from sklearn.decomposition import PCA
import DR_discriminator as dr
X_a = samples
####################################
###################################
# -- the best PC dimension is chosen pc=6 -- #
n_components = num_signals
pca_a = PCA(n_components, svd_solver='full')
pca_a.fit(X_a)
pc_a = pca_a.components_
# projected values on the principal component
T_a = np.matmul(X_a, pc_a.transpose(1, 0))
samples = T_a
# # only for one-dimensional
# samples = T_a.reshape([samples.shape[0], ])
###########################################
###########################################
num_samples_t = (samples.shape[0] - seq_length) // seq_step
aa = np.empty([num_samples_t, seq_length, num_signals])
bb = np.empty([num_samples_t, seq_length, 1])
bbb = np.empty([num_samples_t, seq_length, 1])
for j in range(num_samples_t):
bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
for i in range(num_signals):
aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
samples = aa
labels = bb
index = bbb
return samples, labels, index
# ############################ data pre-processing #################################
# --- to do with loading --- #
# --- to do with loading --- #
def get_samples_and_labels(settings):
"""
Parse settings options to load or generate correct type of data,
perform test/train split as necessary, and reform into 'samples' and 'labels'
dictionaries.
"""
if settings['data_load_from']:
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
samples, pdf, labels = get_data('load', data_path)
train, vali, test = samples['train'], samples['vali'], samples['test']
train_labels, vali_labels, test_labels = labels['train'], labels['vali'], labels['test']
del samples, labels
else:
# generate the data
data_vars = ['num_samples', 'num_samples_t','seq_length', 'seq_step', 'num_signals', 'freq_low',
'freq_high', 'amplitude_low', 'amplitude_high', 'scale', 'full_mnist']
data_settings = dict((k, settings[k]) for k in data_vars if k in settings.keys())
samples, pdf, labels = get_data(settings['data'], settings['seq_length'], settings['seq_step'], settings['num_signals'], settings['sub_id'])
if 'multivariate_mnist' in settings and settings['multivariate_mnist']:
seq_length = samples.shape[1]
samples = samples.reshape(-1, int(np.sqrt(seq_length)), int(np.sqrt(seq_length)))
if 'normalise' in settings and settings['normalise']: # TODO this is a mess, fix
print(settings['normalise'])
norm = True
else:
norm = False
if labels is None:
train, vali, test = split(samples, [0.6, 0.2, 0.2], normalise=norm)
train_labels, vali_labels, test_labels = None, None, None
else:
train, vali, test, labels_list = split(samples, [0.6, 0.2, 0.2], normalise=norm, labels=labels)
train_labels, vali_labels, test_labels = labels_list
labels = dict()
labels['train'], labels['vali'], labels['test'] = train_labels, vali_labels, test_labels
samples = dict()
samples['train'], samples['vali'], samples['test'] = train, vali, test
# futz around with labels
# TODO refactor cause this is messy
if 'one_hot' in settings and settings['one_hot'] and not settings['data_load_from']:
if len(labels['train'].shape) == 1:
# ASSUME labels go from 0 to max_val inclusive, find max-val
max_val = int(np.max([labels['train'].max(), labels['test'].max(), labels['vali'].max()]))
# now we have max_val + 1 dimensions
print('Setting cond_dim to', max_val + 1, 'from', settings['cond_dim'])
settings['cond_dim'] = max_val + 1
print('Setting max_val to 1 from', settings['max_val'])
settings['max_val'] = 1
labels_oh = dict()
for (k, v) in labels.items():
A = np.zeros(shape=(len(v), settings['cond_dim']))
A[np.arange(len(v)), (v).astype(int)] = 1
labels_oh[k] = A
labels = labels_oh
else:
assert settings['max_val'] == 1
# this is already one-hot!
if 'predict_labels' in settings and settings['predict_labels']:
samples, labels = data_utils.make_predict_labels(samples, labels)
print('Setting cond_dim to 0 from', settings['cond_dim'])
settings['cond_dim'] = 0
# update the settings dictionary to update erroneous settings
# (mostly about the sequence length etc. - it gets set by the data!)
settings['seq_length'] = samples['train'].shape[1]
settings['num_samples'] = samples['train'].shape[0] + samples['vali'].shape[0] + samples['test'].shape[0]
settings['num_signals'] = samples['train'].shape[2]
return samples, pdf, labels
def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_single, eval_an, data_options=None):
"""
Helper/wrapper function to get the requested data.
"""
print('data_type')
labels = None
index = None
if data_type == 'load':
data_dict = np.load(data_options).item()
samples = data_dict['samples']
pdf = data_dict['pdf']
labels = data_dict['labels']
elif data_type == 'swat':
samples, labels = swat(seq_length, seq_step, num_signals)
elif data_type == 'swat_test':
samples, labels, index = swat_test(seq_length, seq_step, num_signals)
elif data_type == 'kdd99':
samples, labels = kdd99(seq_length, seq_step, num_signals)
elif data_type == 'kdd99_test':
samples, labels, index = kdd99_test(seq_length, seq_step, num_signals)
elif data_type == 'wadi':
samples, labels = wadi(seq_length, seq_step, num_signals)
elif data_type == 'wadi_test':
samples, labels, index = wadi_test(seq_length, seq_step, num_signals)
else:
raise ValueError(data_type)
print('Generated/loaded', len(samples), 'samples from data-type', data_type)
return samples, labels, index
def get_batch(samples, batch_size, batch_idx, labels=None):
start_pos = batch_idx * batch_size
end_pos = start_pos + batch_size
if labels is None:
return samples[start_pos:end_pos], None
else:
if type(labels) == tuple: # two sets of labels
assert len(labels) == 2
return samples[start_pos:end_pos], labels[0][start_pos:end_pos], labels[1][start_pos:end_pos]
else:
assert type(labels) == np.ndarray
return samples[start_pos:end_pos], labels[start_pos:end_pos]
def split(samples, proportions, normalise=False, scale=False, labels=None, random_seed=None):
"""
Return train/validation/test split.
"""
if random_seed != None:
random.seed(random_seed)
np.random.seed(random_seed)
assert np.sum(proportions) == 1
n_total = samples.shape[0]
n_train = ceil(n_total * proportions[0])
n_test = ceil(n_total * proportions[2])
n_vali = n_total - (n_train + n_test)
# permutation to shuffle the samples
shuff = np.random.permutation(n_total)
train_indices = shuff[:n_train]
vali_indices = shuff[n_train:(n_train + n_vali)]
test_indices = shuff[(n_train + n_vali):]
# TODO when we want to scale we can just return the indices
assert len(set(train_indices).intersection(vali_indices)) == 0
assert len(set(train_indices).intersection(test_indices)) == 0
assert len(set(vali_indices).intersection(test_indices)) == 0
# split up the samples
train = samples[train_indices]
vali = samples[vali_indices]
test = samples[test_indices]
# apply the same normalisation scheme to all parts of the split
if normalise:
if scale: raise ValueError(normalise, scale) # mutually exclusive
train, vali, test = normalise_data(train, vali, test)
elif scale:
train, vali, test = scale_data(train, vali, test)
if labels is None:
return train, vali, test
else:
print('Splitting labels...')
if type(labels) == np.ndarray:
train_labels = labels[train_indices]
vali_labels = labels[vali_indices]
test_labels = labels[test_indices]
labels_split = [train_labels, vali_labels, test_labels]
elif type(labels) == dict:
# more than one set of labels! (weird case)
labels_split = dict()
for (label_name, label_set) in labels.items():
train_labels = label_set[train_indices]
vali_labels = label_set[vali_indices]
test_labels = label_set[test_indices]
labels_split[label_name] = [train_labels, vali_labels, test_labels]
else:
raise ValueError(type(labels))
return train, vali, test, labels_split
================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Differentially private optimizers.
"""
from __future__ import division
import tensorflow as tf
from differential_privacy.dp_sgd.dp_optimizer import utils
#from differential_privacy.dp_sgd.per_example_gradients import per_example_gradients
import pdb
class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):
"""Differentially private gradient descent optimizer.
"""
def __init__(self, learning_rate, eps_delta, sanitizer,
sigma=None, use_locking=False, name="DPGradientDescent",
batches_per_lot=1):
"""Construct a differentially private gradient descent optimizer.
The optimizer uses fixed privacy budget for each batch of training.
Args:
learning_rate: for GradientDescentOptimizer.
eps_delta: EpsDelta pair for each epoch.
sanitizer: for sanitizing the graident.
sigma: noise sigma. If None, use eps_delta pair to compute sigma;
otherwise use supplied sigma directly.
use_locking: use locking.
name: name for the object.
batches_per_lot: Number of batches in a lot.
"""
super(DPGradientDescentOptimizer, self).__init__(learning_rate,
use_locking, name)
# Also, if needed, define the gradient accumulators
self._batches_per_lot = batches_per_lot
self._grad_accum_dict = {}
if batches_per_lot > 1:
self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False,
name="batch_count")
var_list = tf.trainable_variables()
with tf.variable_scope("grad_acc_for"):
for var in var_list:
v_grad_accum = tf.Variable(tf.zeros_like(var),
trainable=False,
name=utils.GetTensorOpName(var))
self._grad_accum_dict[var.name] = v_grad_accum
self._eps_delta = eps_delta
self._sanitizer = sanitizer
self._sigma = sigma
def compute_sanitized_gradients(self, loss, var_list=None,
add_noise=True):
"""Compute the sanitized gradients.
Args:
loss: the loss tensor.
var_list: the optional variables.
add_noise: if true, then add noise. Always clip.
Returns:
a pair of (list of sanitized gradients) and privacy spending accumulation
operations.
Raises:
TypeError: if var_list contains non-variable.
"""
self._assert_valid_dtypes([loss])
xs = [tf.convert_to_tensor(x) for x in var_list]
# TODO check this change
loss_list = tf.unstack(loss, axis=0)
px_grads_byexample = [tf.gradients(l, xs) for l in loss_list]
px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))]
#px_grads = tf.gradients(loss, xs)
# add a dummy 0th dimension to reflect the fact that we have a batch size of 1...
# px_grads = [tf.expand_dims(x, 0) for x in px_grads]
# px_grads = per_example_gradients.PerExampleGradients(loss, xs)
sanitized_grads = []
for px_grad, v in zip(px_grads, var_list):
tensor_name = utils.GetTensorOpName(v)
sanitized_grad = self._sanitizer.sanitize(
px_grad, self._eps_delta, sigma=self._sigma,
tensor_name=tensor_name, add_noise=add_noise,
num_examples=self._batches_per_lot * tf.slice(
tf.shape(px_grad), [0], [1]))
sanitized_grads.append(sanitized_grad)
return sanitized_grads
def minimize(self, loss, global_step=None, var_list=None,
name=None):
"""Minimize using sanitized gradients.
This gets a var_list which is the list of trainable variables.
For each var in var_list, we defined a grad_accumulator variable
during init. When batches_per_lot > 1, we accumulate the gradient
update in those. At the end of each lot, we apply the update back to
the variable. This has the effect that for each lot we compute
gradients at the point at the beginning of the lot, and then apply one
update at the end of the lot. In other words, semantically, we are doing
SGD with one lot being the equivalent of one usual batch of size
batch_size * batches_per_lot.
This allows us to simulate larger batches than our memory size would permit.
The lr and the num_steps are in the lot world.
Args:
loss: the loss tensor.
global_step: the optional global step.
var_list: the optional variables.
name: the optional name.
Returns:
the operation that runs one step of DP gradient descent.
"""
# First validate the var_list
if var_list is None:
var_list = tf.trainable_variables()
for var in var_list:
if not isinstance(var, tf.Variable):
raise TypeError("Argument is not a variable.Variable: %s" % var)
# Modification: apply gradient once every batches_per_lot many steps.
# This may lead to smaller error
if self._batches_per_lot == 1:
sanitized_grads = self.compute_sanitized_gradients(
loss, var_list=var_list)
grads_and_vars = list(zip(sanitized_grads, var_list))
self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
apply_grads = self.apply_gradients(grads_and_vars,
global_step=global_step, name=name)
return apply_grads
# Condition for deciding whether to accumulate the gradient
# or actually apply it.
# we use a private self_batch_count to keep track of number of batches.
# global step will count number of lots processed.
update_cond = tf.equal(tf.constant(0),
tf.mod(self._batch_count,
tf.constant(self._batches_per_lot)))
# Things to do for batches other than last of the lot.
# Add non-noisy clipped grads to shadow variables.
def non_last_in_lot_op(loss, var_list):
"""Ops to do for a typical batch.
For a batch that is not the last one in the lot, we simply compute the
sanitized gradients and apply them to the grad_acc variables.
Args:
loss: loss function tensor
var_list: list of variables
Returns:
A tensorflow op to do the updates to the gradient accumulators
"""
sanitized_grads = self.compute_sanitized_gradients(
loss, var_list=var_list, add_noise=False)
update_ops_list = []
for var, grad in zip(var_list, sanitized_grads):
grad_acc_v = self._grad_accum_dict[var.name]
update_ops_list.append(grad_acc_v.assign_add(grad))
update_ops_list.append(self._batch_count.assign_add(1))
return tf.group(*update_ops_list)
# Things to do for last batch of a lot.
# Add noisy clipped grads to accumulator.
# Apply accumulated grads to vars.
def last_in_lot_op(loss, var_list, global_step):
"""Ops to do for last batch in a lot.
For the last batch in the lot, we first add the sanitized gradients to
the gradient acc variables, and then apply these
values over to the original variables (via an apply gradient)
Args:
loss: loss function tensor
var_list: list of variables
global_step: optional global step to be passed to apply_gradients
Returns:
A tensorflow op to push updates from shadow vars to real vars.
"""
# We add noise in the last lot. This is why we need this code snippet
# that looks almost identical to the non_last_op case here.
sanitized_grads = self.compute_sanitized_gradients(
loss, var_list=var_list, add_noise=True)
normalized_grads = []
for var, grad in zip(var_list, sanitized_grads):
grad_acc_v = self._grad_accum_dict[var.name]
# To handle the lr difference per lot vs per batch, we divide the
# update by number of batches per lot.
normalized_grad = tf.div(grad_acc_v.assign_add(grad),
tf.to_float(self._batches_per_lot))
normalized_grads.append(normalized_grad)
with tf.control_dependencies(normalized_grads):
grads_and_vars = list(zip(normalized_grads, var_list))
self._assert_valid_dtypes(
[v for g, v in grads_and_vars if g is not None])
apply_san_grads = self.apply_gradients(grads_and_vars,
global_step=global_step,
name="apply_grads")
# Now reset the accumulators to zero
resets_list = []
with tf.control_dependencies([apply_san_grads]):
for _, acc in self._grad_accum_dict.items():
reset = tf.assign(acc, tf.zeros_like(acc))
resets_list.append(reset)
resets_list.append(self._batch_count.assign_add(1))
last_step_update = tf.group(*([apply_san_grads] + resets_list))
return last_step_update
# pylint: disable=g-long-lambda
update_op = tf.cond(update_cond,
lambda: last_in_lot_op(
loss, var_list,
global_step),
lambda: non_last_in_lot_op(
loss, var_list))
return tf.group(update_op)
================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/sanitizer.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines Sanitizer class for sanitizing tensors.
A sanitizer first limits the sensitivity of a tensor and then adds noise
to the tensor. The parameters are determined by the privacy_spending and the
other parameters. It also uses an accountant to keep track of the privacy
spending.
"""
from __future__ import division
import collections
import tensorflow as tf
from differential_privacy.dp_sgd.dp_optimizer import utils
import pdb
ClipOption = collections.namedtuple("ClipOption",
["l2norm_bound", "clip"])
class AmortizedGaussianSanitizer(object):
"""Sanitizer with Gaussian noise and amoritzed privacy spending accounting.
This sanitizes a tensor by first clipping the tensor, summing the tensor
and then adding appropriate amount of noise. It also uses an amortized
accountant to keep track of privacy spending.
"""
def __init__(self, accountant, default_option):
"""Construct an AmortizedGaussianSanitizer.
Args:
accountant: the privacy accountant. Expect an amortized one.
default_option: the default ClipOptoin.
"""
self._accountant = accountant
self._default_option = default_option
self._options = {}
def set_option(self, tensor_name, option):
"""Set options for an individual tensor.
Args:
tensor_name: the name of the tensor.
option: clip option.
"""
self._options[tensor_name] = option
def sanitize(self, x, eps_delta, sigma=None,
option=ClipOption(None, None), tensor_name=None,
num_examples=None, add_noise=True):
"""Sanitize the given tensor.
This santize a given tensor by first applying l2 norm clipping and then
adding Gaussian noise. It calls the privacy accountant for updating the
privacy spending.
Args:
x: the tensor to sanitize.
eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to
compute sigma if sigma is None.
sigma: if sigma is not None, use sigma.
option: a ClipOption which, if supplied, used for
clipping and adding noise.
tensor_name: the name of the tensor.
num_examples: if None, use the number of "rows" of x.
add_noise: if True, then add noise, else just clip.
Returns:
a pair of sanitized tensor and the operation to accumulate privacy
spending.
"""
if sigma is None:
# pylint: disable=unpacking-non-sequence
eps, delta = eps_delta
with tf.control_dependencies(
[tf.Assert(tf.greater(eps, 0),
["eps needs to be greater than 0"]),
tf.Assert(tf.greater(delta, 0),
["delta needs to be greater than 0"])]):
# The following formula is taken from
# Dwork and Roth, The Algorithmic Foundations of Differential
# Privacy, Appendix A.
# http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps
l2norm_bound, clip = option
if l2norm_bound is None:
l2norm_bound, clip = self._default_option
if ((tensor_name is not None) and
(tensor_name in self._options)):
l2norm_bound, clip = self._options[tensor_name]
if clip:
x = utils.BatchClipByL2norm(x, l2norm_bound)
if add_noise:
if num_examples is None:
num_examples = tf.slice(tf.shape(x), [0], [1])
privacy_accum_op = self._accountant.accumulate_privacy_spending(
eps_delta, sigma, num_examples)
with tf.control_dependencies([privacy_accum_op]):
saned_x = utils.AddGaussianNoise(tf.reduce_sum(x, 0),
sigma * l2norm_bound)
else:
saned_x = tf.reduce_sum(x, 0)
return saned_x
================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/utils.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for building and training NN models.
"""
from __future__ import division
import math
import numpy
import tensorflow as tf
class LayerParameters(object):
"""class that defines a non-conv layer."""
def __init__(self):
self.name = ""
self.num_units = 0
self._with_bias = False
self.relu = False
self.gradient_l2norm_bound = 0.0
self.bias_gradient_l2norm_bound = 0.0
self.trainable = True
self.weight_decay = 0.0
class ConvParameters(object):
"""class that defines a conv layer."""
def __init__(self):
self.patch_size = 5
self.stride = 1
self.in_channels = 1
self.out_channels = 0
self.with_bias = True
self.relu = True
self.max_pool = True
self.max_pool_size = 2
self.max_pool_stride = 2
self.trainable = False
self.in_size = 28
self.name = ""
self.num_outputs = 0
self.bias_stddev = 0.1
# Parameters for a layered neural network.
class NetworkParameters(object):
"""class that define the overall model structure."""
def __init__(self):
self.input_size = 0
self.projection_type = 'NONE' # NONE, RANDOM, PCA
self.projection_dimensions = 0
self.default_gradient_l2norm_bound = 0.0
self.layer_parameters = [] # List of LayerParameters
self.conv_parameters = [] # List of ConvParameters
def GetTensorOpName(x):
"""Get the name of the op that created a tensor.
Useful for naming related tensors, as ':' in name field of op is not permitted
Args:
x: the input tensor.
Returns:
the name of the op.
"""
t = x.name.rsplit(":", 1)
if len(t) == 1:
return x.name
else:
return t[0]
def BuildNetwork(inputs, network_parameters):
"""Build a network using the given parameters.
Args:
inputs: a Tensor of floats containing the input data.
network_parameters: NetworkParameters object
that describes the parameters for the network.
Returns:
output, training_parameters: where the outputs (a tensor) is the output
of the network, and training_parameters (a dictionary that maps the
name of each variable to a dictionary of parameters) is the parameters
used during training.
"""
training_parameters = {}
num_inputs = network_parameters.input_size
outputs = inputs
projection = None
# First apply convolutions, if needed
for conv_param in network_parameters.conv_parameters:
outputs = tf.reshape(
outputs,
[-1, conv_param.in_size, conv_param.in_size,
conv_param.in_channels])
conv_weights_name = "%s_conv_weight" % (conv_param.name)
conv_bias_name = "%s_conv_bias" % (conv_param.name)
conv_std_dev = 1.0 / (conv_param.patch_size
* math.sqrt(conv_param.in_channels))
conv_weights = tf.Variable(
tf.truncated_normal([conv_param.patch_size,
conv_param.patch_size,
conv_param.in_channels,
conv_param.out_channels],
stddev=conv_std_dev),
trainable=conv_param.trainable,
name=conv_weights_name)
conv_bias = tf.Variable(
tf.truncated_normal([conv_param.out_channels],
stddev=conv_param.bias_stddev),
trainable=conv_param.trainable,
name=conv_bias_name)
training_parameters[conv_weights_name] = {}
training_parameters[conv_bias_name] = {}
conv = tf.nn.conv2d(outputs, conv_weights,
strides=[1, conv_param.stride,
conv_param.stride, 1],
padding="SAME")
relud = tf.nn.relu(conv + conv_bias)
mpd = tf.nn.max_pool(relud, ksize=[1,
conv_param.max_pool_size,
conv_param.max_pool_size, 1],
strides=[1, conv_param.max_pool_stride,
conv_param.max_pool_stride, 1],
padding="SAME")
outputs = mpd
num_inputs = conv_param.num_outputs
# this should equal
# in_size * in_size * out_channels / (stride * max_pool_stride)
# once all the convs are done, reshape to make it flat
outputs = tf.reshape(outputs, [-1, num_inputs])
# Now project, if needed
if network_parameters.projection_type is not "NONE":
projection = tf.Variable(tf.truncated_normal(
[num_inputs, network_parameters.projection_dimensions],
stddev=1.0 / math.sqrt(num_inputs)), trainable=False, name="projection")
num_inputs = network_parameters.projection_dimensions
outputs = tf.matmul(outputs, projection)
# Now apply any other layers
for layer_parameters in network_parameters.layer_parameters:
num_units = layer_parameters.num_units
hidden_weights_name = "%s_weight" % (layer_parameters.name)
hidden_weights = tf.Variable(
tf.truncated_normal([num_inputs, num_units],
stddev=1.0 / math.sqrt(num_inputs)),
name=hidden_weights_name, trainable=layer_parameters.trainable)
training_parameters[hidden_weights_name] = {}
if layer_parameters.gradient_l2norm_bound:
training_parameters[hidden_weights_name]["gradient_l2norm_bound"] = (
layer_parameters.gradient_l2norm_bound)
if layer_parameters.weight_decay:
training_parameters[hidden_weights_name]["weight_decay"] = (
layer_parameters.weight_decay)
outputs = tf.matmul(outputs, hidden_weights)
if layer_parameters.with_bias:
hidden_biases_name = "%s_bias" % (layer_parameters.name)
hidden_biases = tf.Variable(tf.zeros([num_units]),
name=hidden_biases_name)
training_parameters[hidden_biases_name] = {}
if layer_parameters.bias_gradient_l2norm_bound:
training_parameters[hidden_biases_name][
"bias_gradient_l2norm_bound"] = (
layer_parameters.bias_gradient_l2norm_bound)
outputs += hidden_biases
if layer_parameters.relu:
outputs = tf.nn.relu(outputs)
# num_inputs for the next layer is num_units in the current layer.
num_inputs = num_units
return outputs, projection, training_parameters
def VaryRate(start, end, saturate_epochs, epoch):
"""Compute a linearly varying number.
Decrease linearly from start to end until epoch saturate_epochs.
Args:
start: the initial number.
end: the end number.
saturate_epochs: after this we do not reduce the number; if less than
or equal to zero, just return start.
epoch: the current learning epoch.
Returns:
the caculated number.
"""
if saturate_epochs <= 0:
return start
step = (start - end) / (saturate_epochs - 1)
if epoch < saturate_epochs:
return start - step * epoch
else:
return end
def BatchClipByL2norm(t, upper_bound, name=None):
"""Clip an array of tensors by L2 norm.
Shrink each dimension-0 slice of tensor (for matrix it is each row) such
that the l2 norm is at most upper_bound. Here we clip each row as it
corresponds to each example in the batch.
Args:
t: the input tensor.
upper_bound: the upperbound of the L2 norm.
name: optional name.
Returns:
the clipped tensor.
"""
assert upper_bound > 0
with tf.name_scope(values=[t, upper_bound], name=name,
default_name="batch_clip_by_l2norm") as name:
saved_shape = tf.shape(t)
batch_size = tf.slice(saved_shape, [0], [1])
t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
tf.constant(1.0/upper_bound))
# Add a small number to avoid divide by 0
l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound
clipped_t = tf.matmul(tf.diag(scale), t2)
clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
return clipped_t
def SoftThreshold(t, threshold_ratio, name=None):
"""Soft-threshold a tensor by the mean value.
Softthreshold each dimension-0 vector (for matrix it is each column) by
the mean of absolute value multiplied by the threshold_ratio factor. Here
we soft threshold each column as it corresponds to each unit in a layer.
Args:
t: the input tensor.
threshold_ratio: the threshold ratio.
name: the optional name for the returned tensor.
Returns:
the thresholded tensor, where each entry is soft-thresholded by
threshold_ratio times the mean of the aboslute value of each column.
"""
assert threshold_ratio >= 0
with tf.name_scope(values=[t, threshold_ratio], name=name,
default_name="soft_thresholding") as name:
saved_shape = tf.shape(t)
t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
t_abs = tf.abs(t2)
t_x = tf.sign(t2) * tf.nn.relu(t_abs -
(tf.reduce_mean(t_abs, [0],
keep_dims=True) *
threshold_ratio))
return tf.reshape(t_x, saved_shape, name=name)
def AddGaussianNoise(t, sigma, name=None):
"""Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.
Args:
t: the input tensor.
sigma: the stddev of the Gaussian noise.
name: optional name.
Returns:
the noisy tensor.
"""
with tf.name_scope(values=[t, sigma], name=name,
default_name="add_gaussian_noise") as name:
noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
return noisy_t
def GenerateBinomialTable(m):
"""Generate binomial table.
Args:
m: the size of the table.
Returns:
A two dimensional array T where T[i][j] = (i choose j),
for 0<= i, j <=m.
"""
table = numpy.zeros((m + 1, m + 1), dtype=numpy.float64)
for i in range(m + 1):
table[i, 0] = 1
for i in range(1, m + 1):
for j in range(1, m + 1):
v = table[i - 1, j] + table[i - 1, j -1]
assert not math.isnan(v) and not math.isinf(v)
table[i, j] = v
return tf.convert_to_tensor(table)
================================================
FILE: differential_privacy/privacy_accountant/tf/accountant.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines Accountant class for keeping track of privacy spending.
A privacy accountant keeps track of privacy spendings. It has methods
accumulate_privacy_spending and get_privacy_spent. Here we only define
AmortizedAccountant which tracks the privacy spending in the amortized
way. It uses privacy amplication via sampling to compute the privacy
spending for each batch and strong composition (specialized for Gaussian
noise) for accumulate the privacy spending.
"""
from __future__ import division
import abc
import collections
import math
import sys
import numpy
import tensorflow as tf
from differential_privacy.dp_sgd.dp_optimizer import utils
EpsDelta = collections.namedtuple("EpsDelta", ["spent_eps", "spent_delta"])
import pdb
# TODO(liqzhang) To ensure the same API for AmortizedAccountant and
# MomentsAccountant, we pass the union of arguments to both, so we
# have unused_sigma for AmortizedAccountant and unused_eps_delta for
# MomentsAccountant. Consider to revise the API to avoid the unused
# arguments. It would be good to use @abc.abstractmethod, etc, to
# define the common interface as a base class.
class AmortizedAccountant(object):
"""Keep track of privacy spending in an amortized way.
AmortizedAccountant accumulates the privacy spending by assuming
all the examples are processed uniformly at random so the spending is
amortized among all the examples. And we assume that we use Gaussian noise
so the accumulation is on eps^2 and delta, using advanced composition.
"""
def __init__(self, total_examples):
"""Initialization. Currently only support amortized tracking.
Args:
total_examples: total number of examples.
"""
assert total_examples > 0
self._total_examples = total_examples
self._eps_squared_sum = tf.Variable(tf.zeros([1]), trainable=False,
name="eps_squared_sum")
self._delta_sum = tf.Variable(tf.zeros([1]), trainable=False,
name="delta_sum")
def accumulate_privacy_spending(self, eps_delta, unused_sigma,
num_examples):
"""Accumulate the privacy spending.
Currently only support approximate privacy. Here we assume we use Gaussian
noise on randomly sampled batch so we get better composition: 1. the per
batch privacy is computed using privacy amplication via sampling bound;
2. the composition is done using the composition with Gaussian noise.
TODO(liqzhang) Add a link to a document that describes the bounds used.
Args:
eps_delta: EpsDelta pair which can be tensors.
unused_sigma: the noise sigma. Unused for this accountant.
num_examples: the number of examples involved.
Returns:
a TensorFlow operation for updating the privacy spending.
"""
eps, delta = eps_delta
with tf.control_dependencies(
[tf.Assert(tf.greater(delta, 0),
["delta needs to be greater than 0"])]):
amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /
self._total_examples)
# Use privacy amplification via sampling bound.
# See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf
# TODO(liqzhang) Add a link to a document with formal statement
# and proof.
amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (
tf.exp(eps) - 1.0)), [1])
amortize_delta = tf.reshape(amortize_ratio * delta, [1])
return tf.group(*[tf.assign_add(self._eps_squared_sum,
tf.square(amortize_eps)),
tf.assign_add(self._delta_sum, amortize_delta)])
def get_privacy_spent(self, sess, target_eps=None):
"""Report the spending so far.
Args:
sess: the session to run the tensor.
target_eps: the target epsilon. Unused.
Returns:
the list containing a single EpsDelta, with values as Python floats (as
opposed to numpy.float64). This is to be consistent with
MomentAccountant which can return a list of (eps, delta) pair.
"""
# pylint: disable=unused-argument
unused_target_eps = target_eps
eps_squared_sum, delta_sum = sess.run([self._eps_squared_sum,
self._delta_sum])
return [EpsDelta(math.sqrt(eps_squared_sum), float(delta_sum))]
class MomentsAccountant(object):
"""Privacy accountant which keeps track of moments of privacy loss.
Note: The constructor of this class creates tf.Variables that must
be initialized with tf.global_variables_initializer() or similar calls.
MomentsAccountant accumulates the high moments of the privacy loss. It
requires a method for computing differenital moments of the noise (See
below for the definition). So every specific accountant should subclass
this class by implementing _differential_moments method.
Denote by X_i the random variable of privacy loss at the i-th step.
Consider two databases D, D' which differ by one item. X_i takes value
log Pr[M(D')==x]/Pr[M(D)==x] with probability Pr[M(D)==x].
In MomentsAccountant, we keep track of y_i(L) = log E[exp(L X_i)] for some
large enough L. To compute the final privacy spending, we apply Chernoff
bound (assuming the random noise added at each step is independent) to
bound the total privacy loss Z = sum X_i as follows:
Pr[Z > e] = Pr[exp(L Z) > exp(L e)]
< E[exp(L Z)] / exp(L e)
= Prod_i E[exp(L X_i)] / exp(L e)
= exp(sum_i log E[exp(L X_i)]) / exp(L e)
= exp(sum_i y_i(L) - L e)
Hence the mechanism is (e, d)-differentially private for
d = exp(sum_i y_i(L) - L e).
We require d < 1, i.e. e > sum_i y_i(L) / L. We maintain y_i(L) for several
L to compute the best d for any give e (normally should be the lowest L
such that 2 * sum_i y_i(L) / L < e.
We further assume that at each step, the mechanism operates on a random
sample with sampling probability q = batch_size / total_examples. Then
E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]
By distinguishing two cases of whether D < D' or D' < D, we have
that
E[exp(L X)] <= max (I1, I2)
where
I1 = (1-q) E ((1-q) + q P(X+1) / P(X))^L + q E ((1-q) + q P(X) / P(X-1))^L
I2 = E (P(X) / ((1-q) + q P(X+1)))^L
In order to compute I1 and I2, one can consider to
1. use an asymptotic bound, which recovers the advance composition theorem;
2. use the closed formula (like GaussianMomentsAccountant);
3. use numerical integration or random sample estimation.
Dependent on the distribution, we can often obtain a tigher estimation on
the moments and hence a more accurate estimation of the privacy loss than
obtained using generic composition theorems.
"""
__metaclass__ = abc.ABCMeta
def __init__(self, total_examples, moment_orders=32):
"""Initialize a MomentsAccountant.
Args:
total_examples: total number of examples.
moment_orders: the order of moments to keep.
"""
assert total_examples > 0
self._total_examples = total_examples
self._moment_orders = (moment_orders
if isinstance(moment_orders, (list, tuple))
else range(1, moment_orders + 1))
self._max_moment_order = max(self._moment_orders)
assert self._max_moment_order < 100, "The moment order is too large."
self._log_moments = [tf.Variable(numpy.float64(0.0),
trainable=False,
name=("log_moments-%d" % moment_order))
for moment_order in self._moment_orders]
@abc.abstractmethod
def _compute_log_moment(self, sigma, q, moment_order):
"""Compute high moment of privacy loss.
Args:
sigma: the noise sigma, in the multiples of the sensitivity.
q: the sampling ratio.
moment_order: the order of moment.
Returns:
log E[exp(moment_order * X)]
"""
pass
def accumulate_privacy_spending(self, unused_eps_delta,
sigma, num_examples):
"""Accumulate privacy spending.
In particular, accounts for privacy spending when we assume there
are num_examples, and we are releasing the vector
(sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma)
where l2norm_bound is the maximum l2_norm of each example x_i, and
the num_examples have been randomly selected out of a pool of
self.total_examples.
Args:
unused_eps_delta: EpsDelta pair which can be tensors. Unused
in this accountant.
sigma: the noise sigma, in the multiples of the sensitivity (that is,
if the l2norm sensitivity is k, then the caller must have added
Gaussian noise with stddev=k*sigma to the result of the query).
num_examples: the number of examples involved.
Returns:
a TensorFlow operation for updating the privacy spending.
"""
q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples
moments_accum_ops = []
for i in range(len(self._log_moments)):
moment = self._compute_log_moment(sigma, q, self._moment_orders[i])
moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment))
return tf.group(*moments_accum_ops)
def _compute_delta(self, log_moments, eps):
"""Compute delta for given log_moments and eps.
Args:
log_moments: the log moments of privacy loss, in the form of pairs
of (moment_order, log_moment)
eps: the target epsilon.
Returns:
delta
"""
min_delta = 1.0
for moment_order, log_moment in log_moments:
if math.isinf(log_moment) or math.isnan(log_moment):
sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
continue
if log_moment < moment_order * eps:
min_delta = min(min_delta,
math.exp(log_moment - moment_order * eps))
return min_delta
def _compute_eps(self, log_moments, delta):
min_eps = float("inf")
for moment_order, log_moment in log_moments:
if math.isinf(log_moment) or math.isnan(log_moment):
sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
continue
min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order)
return min_eps
def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):
"""Compute privacy spending in (e, d)-DP form for a single or list of eps.
Args:
sess: the session to run the tensor.
target_eps: a list of target epsilon's for which we would like to
compute corresponding delta value.
target_deltas: a list of target deltas for which we would like to
compute the corresponding eps value. Caller must specify
either target_eps or target_delta.
Returns:
A list of EpsDelta pairs.
"""
assert (target_eps is None) ^ (target_deltas is None)
eps_deltas = []
log_moments = sess.run(self._log_moments)
log_moments_with_order = numpy.array(list(zip(self._moment_orders, log_moments)))
if target_eps is not None:
for eps in target_eps:
delta = self._compute_delta(log_moments_with_order, eps)
eps_deltas.append(EpsDelta(eps, delta))
else:
assert target_deltas
for delta in target_deltas:
eps_deltas.append(
EpsDelta(self._compute_eps(log_moments_with_order, delta), delta))
return eps_deltas
class GaussianMomentsAccountant(MomentsAccountant):
"""MomentsAccountant which assumes Gaussian noise.
GaussianMomentsAccountant assumes the noise added is centered Gaussian
noise N(0, sigma^2 I). In this case, we can compute the differential moments
accurately using a formula.
For asymptotic bound, for Gaussian noise with variance sigma^2, we can show
for L < sigma^2, q L < sigma,
log E[exp(L X)] = O(q^2 L^2 / sigma^2).
Using this we derive that for training T epoches, with batch ratio q,
the Gaussian mechanism with variance sigma^2 (with q < 1/sigma) is (e, d)
private for d = exp(T/q q^2 L^2 / sigma^2 - L e). Setting L = sigma^2,
Tq = e/2, the mechanism is (e, exp(-e sigma^2/2))-DP. Equivalently, the
mechanism is (e, d)-DP if sigma = sqrt{2 log(1/d)}/e, q < 1/sigma,
and T < e/(2q). This bound is better than the bound obtained using general
composition theorems, by an Omega(sqrt{log k}) factor on epsilon, if we run
k steps. Since we use direct estimate, the obtained privacy bound has tight
constant.
For GaussianMomentAccountant, it suffices to compute I1, as I1 >= I2,
which reduce to computing E(P(x+s)/P(x+s-1) - 1)^i for s = 0 and 1. In the
companion gaussian_moments.py file, we supply procedure for computing both
I1 and I2 (the computation of I2 is through multi-precision integration
package). It can be verified that indeed I1 >= I2 for wide range of parameters
we have tried, though at the moment we are unable to prove this claim.
We recommend that when using this accountant, users independently verify
using gaussian_moments.py that for their parameters, I1 is indeed larger
than I2. This can be done by following the instructions in
gaussian_moments.py.
"""
def __init__(self, total_examples, moment_orders=32):
"""Initialization.
Args:
total_examples: total number of examples.
moment_orders: the order of moments to keep.
"""
super(self.__class__, self).__init__(total_examples, moment_orders)
self._binomial_table = utils.GenerateBinomialTable(self._max_moment_order)
def _differential_moments(self, sigma, s, t):
"""Compute 0 to t-th differential moments for Gaussian variable.
E[(P(x+s)/P(x+s-1)-1)^t]
= sum_{i=0}^t (t choose i) (-1)^{t-i} E[(P(x+s)/P(x+s-1))^i]
= sum_{i=0}^t (t choose i) (-1)^{t-i} E[exp(-i*(2*x+2*s-1)/(2*sigma^2))]
= sum_{i=0}^t (t choose i) (-1)^{t-i} exp(i(i+1-2*s)/(2 sigma^2))
Args:
sigma: the noise sigma, in the multiples of the sensitivity.
s: the shift.
t: 0 to t-th moment.
Returns:
0 to t-th moment as a tensor of shape [t+1].
"""
assert t <= self._max_moment_order, ("The order of %d is out "
"of the upper bound %d."
% (t, self._max_moment_order))
binomial = tf.slice(self._binomial_table, [0, 0],
[t + 1, t + 1])
signs = numpy.zeros((t + 1, t + 1), dtype=numpy.float64)
for i in range(t + 1):
for j in range(t + 1):
signs[i, j] = 1.0 - 2 * ((i - j) % 2)
exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
for j in range(t + 1)], dtype=tf.float64)
# x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
x = tf.multiply(binomial, signs)
# y[i, j] = x[i, j] * exp(exponents[j])
# = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
# Note: this computation is done by broadcasting pointwise multiplication
# between [t+1, t+1] tensor and [t+1] tensor.
y = tf.multiply(x, tf.exp(exponents))
# z[i] = sum_j y[i, j]
# = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
z = tf.reduce_sum(y, 1)
return z
def _compute_log_moment(self, sigma, q, moment_order):
"""Compute high moment of privacy loss.
Args:
sigma: the noise sigma, in the multiples of the sensitivity.
q: the sampling ratio.
moment_order: the order of moment.
Returns:
log E[exp(moment_order * X)]
"""
assert moment_order <= self._max_moment_order, ("The order of %d is out "
"of the upper bound %d."
% (moment_order,
self._max_moment_order))
binomial_table = tf.slice(self._binomial_table, [moment_order, 0],
[1, moment_order + 1])
# qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q))
qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)],
dtype=tf.float64) * tf.cast(
tf.log(q), dtype=tf.float64))
moments0 = self._differential_moments(sigma, 0.0, moment_order)
term0 = tf.reduce_sum(binomial_table * qs * moments0)
moments1 = self._differential_moments(sigma, 1.0, moment_order)
term1 = tf.reduce_sum(binomial_table * qs * moments1)
return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1,
tf.float64)))
class DummyAccountant(object):
"""An accountant that does no accounting."""
def accumulate_privacy_spending(self, *unused_args):
return tf.no_op()
def get_privacy_spent(self, unused_sess, **unused_kwargs):
return [EpsDelta(numpy.inf, 1.0)]
================================================
FILE: eugenium_mmd.py
================================================
'''
Code taken from: https://github.com/eugenium/mmd
(modified slightly for efficiency/PEP by Stephanie Hyland)
Python implementation of MMD and Covariance estimates for Relative MMD
Some code is based on code from Vincent Van Asch
which is based on matlab code from Arthur Gretton
Eugene Belilovsky
eugene.belilovsky@inria.fr
'''
import numpy as np
import scipy as sp
from numpy import sqrt
from sklearn.metrics.pairwise import rbf_kernel
from functools import partial
import pdb
def my_kernel(X, Y, sigma):
gamma = 1 / (2 * sigma**2)
if len(X.shape) == 2:
X_sqnorms = np.einsum('...i,...i', X, X)
Y_sqnorms = np.einsum('...i,...i', Y, Y)
XY = np.einsum('ia,ja', X, Y)
elif len(X.shape) == 3:
X_sqnorms = np.einsum('...ij,...ij', X, X)
Y_sqnorms = np.einsum('...ij,...ij', Y, Y)
XY = np.einsum('iab,jab', X, Y)
else:
pdb.set_trace()
Kxy = np.exp(-gamma*(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1)))
return Kxy
def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=False):
'''Performs the relative MMD test which returns a test statistic for whether Y is closer to X or than Z.
See http://arxiv.org/pdf/1511.04581.pdf
The bandwith heuristic is based on the median heuristic (see Smola,Gretton).
'''
if(sigma<0):
#Similar heuristics
if SelectSigma:
siz=np.min((1000, X.shape[0]))
sigma1=kernelwidthPair(X[0:siz], Y[0:siz]);
sigma2=kernelwidthPair(X[0:siz], Z[0:siz]);
sigma=(sigma1+sigma2)/2.
else:
siz=np.min((1000, X.shape[0]*3))
Zem=np.r_[X[0:siz/3], Y[0:siz/3], Z[0:siz/3]]
sigma=kernelwidth(Zem);
#kernel = partial(rbf_kernel, gamma=1.0/(sigma**2))
kernel = partial(my_kernel, sigma=sigma)
#kernel = partial(grbf, sigma=sigma)
Kyy = kernel(Y, Y)
Kzz = kernel(Z, Z)
Kxy = kernel(X, Y)
Kxz = kernel(X, Z)
Kyynd = Kyy-np.diag(np.diagonal(Kyy))
Kzznd = Kzz-np.diag(np.diagonal(Kzz))
m = Kxy.shape[0];
n = Kyy.shape[0];
r = Kzz.shape[0];
u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) )
u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) )
u_xy=np.sum(Kxy)/(m*n)
u_xz=np.sum(Kxz)/(m*r)
#Compute the test statistic
t=u_yy - 2.*u_xy - (u_zz-2.*u_xz)
Diff_Var, Diff_Var_z2, data=MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz)
pvalue=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var)))
# pvalue_z2=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var_z2)))
tstat=t/sqrt(Diff_Var)
if(computeMMDs):
Kxx = kernel(X, X)
Kxxnd = Kxx-np.diag(np.diagonal(Kxx))
u_xx=np.sum(Kxxnd)*( 1./(m*(m-1)) )
MMDXY=u_xx+u_yy-2.*u_xy
MMDXZ=u_xx+u_zz-2.*u_xz
else:
MMDXY=None
MMDXZ=None
return pvalue, tstat, sigma, MMDXY, MMDXZ
def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz):
'''
Compute the variance of the difference statistic MMDXY-MMDXZ
See http://arxiv.org/pdf/1511.04581.pdf Appendix for derivations
'''
m = Kxy.shape[0];
n = Kyy.shape[0];
r = Kzz.shape[0];
Kyynd = Kyy-np.diag(np.diagonal(Kyy));
Kzznd = Kzz-np.diag(np.diagonal(Kzz));
u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) );
u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) );
u_xy=np.sum(Kxy)/(m*n);
u_xz=np.sum(Kxz)/(m*r);
#compute zeta1
t1=(1./n**3)*np.sum(Kyynd.T.dot(Kyynd))-u_yy**2;
t2=(1./(n**2*m))*np.sum(Kxy.T.dot(Kxy))-u_xy**2;
t3=(1./(n*m**2))*np.sum(Kxy.dot(Kxy.T))-u_xy**2;
t4=(1./r**3)*np.sum(Kzznd.T.dot(Kzznd))-u_zz**2;
t5=(1./(r*m**2))*np.sum(Kxz.dot(Kxz.T))-u_xz**2;
t6=(1./(r**2*m))*np.sum(Kxz.T.dot(Kxz))-u_xz**2;
t7=(1./(n**2*m))*np.sum(Kyynd.dot(Kxy.T))-u_yy*u_xy;
t8=(1./(n*m*r))*np.sum(Kxy.T.dot(Kxz))-u_xz*u_xy;
t9=(1./(r**2*m))*np.sum(Kzznd.dot(Kxz.T))-u_zz*u_xz;
zeta1=(t1+t2+t3+t4+t5+t6-2.*(t7+t8+t9));
zeta2=(1/m/(m-1))*np.sum((Kyynd-Kzznd-Kxy.T-Kxy+Kxz+Kxz.T)**2)-(u_yy - 2.*u_xy - (u_zz-2.*u_xz))**2;
data=dict({'t1':t1,
't2':t2,
't3':t3,
't4':t4,
't5':t5,
't6':t6,
't7':t7,
't8':t8,
't9':t9,
'zeta1':zeta1,
'zeta2':zeta2,
})
#TODO more precise version for zeta2
# xx=(1/m^2)*sum(sum(Kxxnd.*Kxxnd))-u_xx^2;
# yy=(1/n^2)*sum(sum(Kyynd.*Kyynd))-u_yy^2;
#xy=(1/(n*m))*sum(sum(Kxy.*Kxy))-u_xy^2;
#xxy=(1/(n*m^2))*sum(sum(Kxxnd*Kxy))-u_xx*u_xy;
#yyx=(1/(n^2*m))*sum(sum(Kyynd*Kxy'))-u_yy*u_xy;
#zeta2=(xx+yy+xy+xy-2*(xxy+xxy +yyx+yyx))
Var=(4.*(m-2)/(m*(m-1)))*zeta1;
Var_z2=Var+(2./(m*(m-1)))*zeta2;
return Var, Var_z2, data
def grbf(x1, x2, sigma):
'''Calculates the Gaussian radial base function kernel'''
n, nfeatures = x1.shape
m, mfeatures = x2.shape
k1 = np.sum((x1*x1), 1)
q = np.tile(k1, (m, 1)).transpose()
del k1
k2 = np.sum((x2*x2), 1)
r = np.tile(k2.T, (n, 1))
del k2
h = q + r
del q, r
# The norm
h = h - 2*np.dot(x1, x2.transpose())
h = np.array(h, dtype=float)
return np.exp(-1.*h/(2.*pow(sigma, 2)))
def kernelwidthPair(x1, x2):
'''Implementation of the median heuristic. See Gretton 2012
Pick sigma such that the exponent of exp(- ||x-y|| / (2*sigma2)),
in other words ||x-y|| / (2*sigma2), equals 1 for the median distance x
and y of all distances between points from both data sets X and Y.
'''
n, nfeatures = x1.shape
m, mfeatures = x2.shape
k1 = np.sum((x1*x1), 1)
q = np.tile(k1, (m, 1)).transpose()
del k1
k2 = np.sum((x2*x2), 1)
r = np.tile(k2, (n, 1))
del k2
h= q + r
del q, r
# The norm
h = h - 2*np.dot(x1, x2.transpose())
h = np.array(h, dtype=float)
mdist = np.median([i for i in h.flat if i])
sigma = sqrt(mdist/2.0)
if not sigma: sigma = 1
return sigma
def kernelwidth(Zmed):
'''Alternative median heuristic when we cant partition the points
'''
m= Zmed.shape[0]
k1 = np.expand_dims(np.sum((Zmed*Zmed), axis=1), 1)
q = np.kron(np.ones((1, m)), k1)
r = np.kron(np.ones((m, 1)), k1.T)
del k1
h= q + r
del q, r
# The norm
h = h - 2.*Zmed.dot(Zmed.T)
h = np.array(h, dtype=float)
mdist = np.median([i for i in h.flat if i])
sigma = sqrt(mdist/2.0)
if not sigma: sigma = 1
return sigma
def MMD_unbiased(Kxx, Kyy, Kxy):
#The estimate when distribution of x is not equal to y
m = Kxx.shape[0]
n = Kyy.shape[0]
t1 = (1./(m*(m-1)))*np.sum(Kxx - np.diag(np.diagonal(Kxx)))
t2 = (2./(m*n)) * np.sum(Kxy)
t3 = (1./(n*(n-1)))* np.sum(Kyy - np.diag(np.diagonal(Kyy)))
MMDsquared = (t1-t2+t3)
return MMDsquared
================================================
FILE: eval.py
================================================
#!/usr/bin/env ipython
# Evaluation of models
#
import json
import pdb
import numpy as np
import pandas as pd
from eugenium_mmd import MMD_3_Sample_Test
from scipy.stats import ks_2samp
import mmd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score
from sklearn.ensemble import RandomForestClassifier
import sklearn
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# for keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.backend import clear_session
import model
import data_utils
import plotting
import pickle
def assert_same_data(A, B):
# case 0, both loaded
if A['data'] == 'load' and B['data'] == 'load':
assert A['data_load_from'] == B['data_load_from']
data_path = './experiments/data/' + A['data_load_from']
elif A['data'] == 'load' and (not B['data'] == 'load'):
assert A['data_load_from'] == B['identifier']
data_path = './experiments/data/' + A['data_load_from']
elif (not A['data'] == 'load') and B['data'] == 'load':
assert B['data_load_from'] == A['identifier']
data_path = './experiments/data/' + A['identifier']
else:
raise ValueError(A['data'], B['data'])
return data_path
def model_memorisation(identifier, epoch, max_samples=2000, tstr=False):
"""
Compare samples from a model against training set and validation set in mmd
"""
if tstr:
print('Loading data from TSTR experiment (not sampling from model)')
# load pre-generated samples
synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
model_samples = synth_data['samples']
synth_labels = synth_data['labels']
# load real data used in that experiment
real_data = np.load('./experiments/data/' + identifier + '.data.npy').item()
real_samples = real_data['samples']
train = real_samples['train']
test = real_samples['test']
n_samples = test.shape[0]
if model_samples.shape[0] > n_samples:
model_samples = np.random.permutation(model_samples)[:n_samples]
print('Data loaded successfully!')
else:
if identifier == 'cristobal_eICU':
model_samples = pickle.load(open('REDACTED', 'rb'))
samples, labels = data_utils.eICU_task()
train = samples['train'].reshape(-1,16,4)
vali = samples['vali'].reshape(-1,16,4)
test = samples['test'].reshape(-1,16,4)
#train_targets = labels['train']
#vali_targets = labels['vali']
#test_targets = labels['test']
train, vali, test = data_utils.scale_data(train, vali, test)
n_samples = test.shape[0]
if n_samples > max_samples:
n_samples = max_samples
test = np.random.permutation(test)[:n_samples]
if model_samples.shape[0] > n_samples:
model_samples = np.random.permutation(model_samples)[:n_samples]
elif identifier == 'cristobal_MNIST':
the_dir = 'REDACTED'
# pick a random one
which = np.random.choice(['NEW_OK_', '_r4', '_r5', '_r6', '_r7'])
model_samples, model_labels = pickle.load(open(the_dir + 'synth_mnist_minist_cdgan_1_2_100_multivar_14_nolr_rdim3_0_2_' + which + '_190.pk', 'rb'))
# get test and train...
# (generated with fixed seed...)
mnist_resized_dim = 14
samples, labels = data_utils.load_resized_mnist(mnist_resized_dim)
proportions = [0.6, 0.2, 0.2]
train, vali, test, labels_split = data_utils.split(samples, labels=labels, random_seed=1, proportions=proportions)
np.random.seed()
train = train.reshape(-1, 14, 14)
test = test.reshape(-1, 14, 14)
vali = vali.reshape(-1, 14, 14)
n_samples = test.shape[0]
if n_samples > max_samples:
n_samples = max_samples
test = np.random.permutation(test)[:n_samples]
if model_samples.shape[0] > n_samples:
model_samples = np.random.permutation(model_samples)[:n_samples]
else:
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
# get the test, train sets
data = np.load('./experiments/data/' + identifier + '.data.npy').item()
train = data['samples']['train']
test = data['samples']['test']
n_samples = test.shape[0]
if n_samples > max_samples:
n_samples = max_samples
test = np.random.permutation(test)[:n_samples]
model_samples = model.sample_trained_model(settings, epoch, n_samples)
all_samples = np.vstack([train, test, model_samples])
heuristic_sigma = mmd.median_pairwise_distance(all_samples)
print('heuristic sigma:', heuristic_sigma)
pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, test, np.random.permutation(train)[:n_samples], sigma=heuristic_sigma, computeMMDs=False)
#pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, np.random.permutation(train)[:n_samples], test, sigma=heuristic_sigma, computeMMDs=False)
# if pvalue < 0.05:
# print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that the test data has a smaller MMD with the true data than the generated data')
# the function takes (X, Y, Z) as its first arguments, it's testing if MMDXY (i.e. MMD between model and train) is less than MMDXZ (MMd between model and test)
# else:
# print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclu#de that the test data has a smaller MMD with the true data than the generated data')
return pvalue, tstat, sigma
def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99):
"""
Compare two models using relative MMD test
"""
# make sure they used the same data
settings_A = json.load(open('./experiments/settings/' + identifier_A + '.txt', 'r'))
settings_B = json.load(open('./experiments/settings/' + identifier_B + '.txt', 'r'))
data_path = assert_same_data(settings_A, settings_B)
# now load the data
data = np.load(data_path + '.data.npy').item()['samples']['vali']
n_samples = data.shape[0]
A_samples = model.sample_trained_model(settings_A, epoch_A, n_samples)
B_samples = model.sample_trained_model(settings_B, epoch_B, n_samples)
# do the comparison
# TODO: support multiple signals
## some notes about this test:
## MMD_3_Sample_Test(X, Y, Z) tests the hypothesis that Px is closer to Pz than Py
## that is, test the null hypothesis H0:
## MMD(F, Px, Py) <= MMD(F, Px, Pz)
## versus the alternate hypothesis:
## MMD(F, Px, Py) > MMD(F, Px, Pz)
## at significance level that we select later (just the threshold on the p-value)
pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(data[:, :, 0], A_samples[:, :, 0], B_samples[:, :, 0], computeMMDs=True)
print(pvalue, tstat, sigma)
if pvalue < 0.05:
print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)
else:
print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)
return pvalue, tstat, sigma, MMDXY, MMDXZ
# --- to do with reconstruction --- #
def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False):
"""
Get the reconstruction error of every point in the training set of a given
experiment.
"""
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
if settings['data_load_from']:
data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item()
else:
data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item()
samples = data_dict['samples']
train = samples['train']
vali = samples['vali']
test = samples['test']
labels = data_dict['labels']
train_labels, test_labels, synth_labels, vali_labels = None, None, None, None
try:
if rerun:
raise FileNotFoundError
errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item()
train_errors = errors['train']
test_errors = errors['test']
generated_errors = errors['generated']
noisy_errors = errors['noisy']
print('Loaded precomputed errors')
except FileNotFoundError:
if tstr:
synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
generated = synth_data['samples']
synth_labels = synth_data['labels']
train_labels = labels['train']
test_labels = labels['test']
vali_labels = labels['vali']
else:
# generate new data
n_eval = 500
# generate "easy" samples from the distribution
generated = model.sample_trained_model(settings, epoch, n_eval)
# generate "hard' random samples, not from train/test distribution
# TODO: use original validation examples, add noise etc.
## random_samples = np.random.normal(size=generated.shape)
# random_samples -= np.mean(random_samples, axis=0)
# random_samples += np.mean(vali, axis=0)
# random_samples /= np.std(random_samples, axis=0)
# random_samples *= np.std(vali, axis=0)
# get all the errors
print('Getting reconstruction errors on train set')
if train.shape[0] > max_samples:
index_subset = np.random.permutation(train.shape[0])[:max_samples]
train = train[index_subset]
if train_labels is not None:
train_labels = train_labels[index_subset]
train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels)
print('Getting reconstruction errors on test set')
if test.shape[0] > max_samples:
index_subset = np.random.permutation(test.shape[0])[:max_samples]
test = test[index_subset]
if test_labels is not None:
test_labels = test_labels[index_subset]
test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels)
D_test, p_test = ks_2samp(train_errors, test_errors)
print('KS statistic and p-value for train v. test erors:', D_test, p_test)
pdb.set_trace()
print('Getting reconstruction errors on generated set')
generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels)
D_gen, p_gen = ks_2samp(generated_errors, train_errors)
print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)
# print('Getting reconstruction errors on noisy set')
# alpha = 0.5
# noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali)
# noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels)
noisy_errors = None
# save!
errors = {'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors}
np.save('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors)
# do two-sample Kolomogorov-Smirnov test for equality
D_test, p_test = ks_2samp(train_errors, test_errors)
print('KS statistic and p-value for train v. test erors:', D_test, p_test)
D_gen, p_gen = ks_2samp(generated_errors, train_errors)
print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)
# visualise distribution of errors for train and test
plotting.reconstruction_errors(identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors)
# visualise the "hardest" and "easiest" samples from train
ranking_train = np.argsort(train_errors)
easiest_train = ranking_train[:6]
hardest_train = ranking_train[-6:]
plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2)
plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2)
# visualise the "hardest" and "easiest" samples from random
# ranking_random = np.argsort(noisy_errors)
# easiest_random = ranking_random[:6]
# hardest_random = ranking_random[-6:]
# plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2)
# plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2)
return True
def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g_tolerance=0.025, use_min=True, C_samples=None):
"""
Get (average over a few runs) of the reconstruction error per sample
"""
n_samples = samples.shape[0]
heuristic_sigma = np.float32(mmd.median_pairwise_distance(samples))
errors = np.zeros(shape=(n_samples, n_rep))
for rep in range(n_rep):
Z, rep_errors, sigma = model.invert(identifier, epoch, samples, n_iter=n_iter, heuristic_sigma=heuristic_sigma, g_tolerance=g_tolerance, C_samples=C_samples)
errors[:, rep] = rep_errors
# return min, or average?
if use_min:
errors = np.min(errors, axis=1)
else:
# use mean
errors = np.mean(errors, axis=1)
return errors
# --- visualisation evaluation --- #
def view_digit(identifier, epoch, digit, n_samples=6):
"""
Generate a bunch of MNIST digits from a CGAN, view them
"""
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
if settings['one_hot']:
assert settings['max_val'] == 1
assert digit <= settings['cond_dim']
C_samples = np.zeros(shape=(n_samples, settings['cond_dim']))
C_samples[:, digit] = 1
else:
assert settings['cond_dim'] == 1
assert digit <= settings['max_val']
C_samples = np.array([digit]*n_samples).reshape(-1, 1)
digit_samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples=None, cond_dim=settings['cond_dim'], C_samples=C_samples)
digit_samples = digit_samples.reshape(n_samples, -1, 1)
# visualise
plotting.save_mnist_plot_sample(digit_samples, digit, identifier + '_' + str(epoch) + '_digit_', n_samples)
return True
def view_interpolation(identifier, epoch, n_steps=6, input_samples=None, e_tolerance=0.01, sigma=3.29286853021):
"""
If samples: generate interpolation between real points
Else:
Sample two points in the latent space, view a linear interpolation between them.
"""
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
if input_samples is None:
# grab two trainng examples
data = np.load('./experiments/data/' + identifier + '.data.npy').item()
train = data['samples']['train']
input_samples = np.random.permutation(train)[:2]
# Z_sampleA, Z_sampleB = model.sample_Z(2, settings['seq_length'], settings['latent_dim'],
# settings['use_time'])
if sigma is None:
## gotta get a sigma somehow
sigma = mmd.median_pairwise_distance(train)
print('Calcualted heuristic sigma from training data:', sigma)
Zs, error, _ = model.invert(settings, epoch, input_samples, e_tolerance=e_tolerance)
Z_sampleA, Z_sampleB = Zs
Z_samples = plotting.interpolate(Z_sampleA, Z_sampleB, n_steps=n_steps)
samples = model.sample_trained_model(settings, epoch, Z_samples.shape[0], Z_samples)
# get distances from generated samples to target samples
d_A, d_B = [], []
for sample in samples:
d_A.append(sample_distance(sample, samples[0], sigma))
d_B.append(sample_distance(sample, samples[-1], sigma))
distances = pd.DataFrame({'dA': d_A, 'dB': d_B})
plotting.save_plot_interpolate(input_samples, samples, epoch, settings['identifier'] + '_epoch' + str(epoch), distances=distances, sigma=sigma)
return True
def view_latent_vary(identifier, epoch, n_steps=6):
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
Z_sample = model.sample_Z(1, settings['seq_length'], settings['latent_dim'],
settings['use_time'])[0]
samples_dim = []
for dim in range(settings['latent_dim']):
Z_samples_dim = plotting.vary_latent_dimension(Z_sample, dim, n_steps)
samples_dim.append(model.sample_trained_model(settings, epoch, Z_samples_dim.shape[0], Z_samples_dim))
plotting.save_plot_vary_dimension(samples_dim, epoch, settings['identifier'] + '_varydim', n_dim=settings['latent_dim'])
return True
def view_reconstruction(identifier, epoch, real_samples, tolerance=1):
"""
Given a set of real samples, find the "closest" latent space points
corresponding to them, generate samples from these, visualise!
"""
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
Zs, error, sigma = model.invert(settings, epoch, real_samples, tolerance=tolerance)
plotting.visualise_latent(Zs[0], identifier+'_' + str(epoch) + '_0')
plotting.visualise_latent(Zs[1], identifier+'_' + str(epoch) + '_1')
model_samples = model.sample_trained_model(settings, epoch, Zs.shape[0], Zs)
plotting.save_plot_reconstruct(real_samples, model_samples, settings['identifier'])
return True
def view_fixed(identifier, epoch, n_samples=6, dim=None):
""" What happens when we give the same point at each time step? """
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'],
settings['use_time'])
# now, propagate forward the value at time 0 (which time doesn't matter)
for i in range(1, settings['seq_length']):
if dim is None:
Z_samples[:, i, :] = Z_samples[:, 0, :]
else:
Z_samples[:, i, dim] = Z_samples[:, 0, dim]
# now generate
samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples)
# now visualise
plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples)
return True
def view_params(identifier, epoch):
""" Visualise weight matrices in the GAN """
settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
parameters = model.load_parameters(identifier + '_' + str(epoch))
plotting.plot_parameters(parameters, identifier + '_' + str(epoch))
return True
# --- to do with samples --- #
def sample_distance(sampleA, sampleB, sigma):
"""
I know this isn't the best distance measure, alright.
"""
# RBF!
gamma = 1 / (2 * sigma**2)
similarity = np.exp(-gamma*(np.linalg.norm(sampleA - sampleB)**2))
distance = 1 - similarity
return distance
### --- TSTR ---- ###
def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X):
"""
Train a CNN (code copied/adapted from Cristobal's mnist_keras_trts_0_2)
(ONLY MNIST, ONLY 14x14)
(ONLY DIGITS UP TO 3)
"""
print('Training CNN!')
input_shape = (14,14,1)
batch_size = 128
num_classes = 3
epochs = 1000
m = Sequential()
m.add(Conv2D(16, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
m.add(Conv2D(32, (3, 3), activation='relu'))
m.add(MaxPooling2D(pool_size=(2, 2)))
m.add(Dropout(0.25))
m.add(Flatten())
m.add(Dense(128, activation='relu'))
m.add(Dropout(0.5))
m.add(Dense(num_classes, activation='softmax'))
m.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto')
m.fit(np.expand_dims(train_X, axis=-1), train_Y,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(np.expand_dims(vali_X, axis=-1), vali_Y),
callbacks=[earlyStopping])
test_predictions = m.predict(np.expand_dims(test_X, axis=-1))
return test_predictions
def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali=True, CNN=False, reverse=False):
"""
Either load or generate synthetic training, real test data...
Load synthetic training, real test data, do multi-class SVM
(basically just this: http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html)
If reverse = True: do TRTS
"""
print('Running TSTR on', identifier, 'at epoch', epoch)
if vali:
test_set = 'vali'
else:
test_set = 'test'
if generate:
data = np.load('./experiments/data/' + identifier + '.data.npy').item()
samples = data['samples']
train_X = samples['train']
test_X = samples[test_set]
labels = data['labels']
train_Y = labels['train']
test_Y = labels[test_set]
# now sample from the model
synth_Y = np.tile(train_Y, [duplicate_synth, 1])
synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)
# for use in TRTS
synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)
synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}
np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)
else:
print('Loading synthetic data from pre-sampled model')
exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
test_X, test_Y = exp_data['test_data'], exp_data['test_labels']
train_X, train_Y = exp_data['train_data'], exp_data['train_labels']
synth_X, synth_Y = exp_data['synth_data'], exp_data['synth_labels']
if reverse:
which_setting = 'trts'
print('Swapping synthetic test set in for real, to do TRTS!')
test_X = synth_testX
else:
print('Doing normal TSTR')
which_setting = 'tstr'
# make classifier
if not CNN:
model_choice = 'RF'
# if multivariate, reshape
if len(test_X.shape) == 3:
test_X = test_X.reshape(test_X.shape[0], -1)
if len(train_X.shape) == 3:
train_X = train_X.reshape(train_X.shape[0], -1)
if len(synth_X.shape) == 3:
synth_X = synth_X.reshape(synth_X.shape[0], -1)
# if one hot, fix
if len(synth_Y.shape) > 1 and not synth_Y.shape[1] == 1:
synth_Y = np.argmax(synth_Y, axis=1)
train_Y = np.argmax(train_Y, axis=1)
test_Y = np.argmax(test_Y, axis=1)
# random forest
#synth_classifier = SVC(gamma=0.001)
#real_classifier = SVC(gamma=0.001)
synth_classifier = RandomForestClassifier(n_estimators=500)
real_classifier = RandomForestClassifier(n_estimators=500)
# fit
real_classifier.fit(train_X, train_Y)
synth_classifier.fit(synth_X, synth_Y)
# test on real
synth_predY = synth_classifier.predict(test_X)
real_predY = real_classifier.predict(test_X)
else:
model_choice = 'CNN'
synth_predY = train_CNN(synth_X, synth_Y, samples['vali'], labels['vali'], test_X)
clear_session()
real_predY = train_CNN(train_X, train_Y, samples['vali'], labels['vali'], test_X)
clear_session()
# CNN setting is all 'one-hot'
test_Y = np.argmax(test_Y, axis=1)
synth_predY = np.argmax(synth_predY, axis=1)
real_predY = np.argmax(real_predY, axis=1)
# report on results
synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y, synth_predY, average='weighted')
synth_accuracy = accuracy_score(test_Y, synth_predY)
synth_auprc = 'NaN'
synth_auroc = 'NaN'
synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]
real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y, real_predY, average='weighted')
real_accuracy = accuracy_score(test_Y, real_predY)
real_auprc = 'NaN'
real_auroc = 'NaN'
real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]
all_scores = synth_scores + real_scores
if vali:
report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')
report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
report_file.close()
else:
report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')
report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
report_file.close()
# visualise results
try:
plotting.view_mnist_eval(identifier + '_' + str(epoch), train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY)
except ValueError:
print('PLOTTING ERROR')
pdb.set_trace()
print(classification_report(test_Y, synth_predY))
print(classification_report(test_Y, real_predY))
return synth_f1, real_f1
def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False):
"""
"""
if vali:
test_set = 'vali'
else:
test_set = 'test'
data = np.load('./experiments/data/' + identifier + '.data.npy').item()
samples = data['samples']
train_X = samples['train']
test_X = samples[test_set]
labels = data['labels']
train_Y = labels['train']
test_Y = labels[test_set]
if generate:
# now sample from the model
synth_Y = np.tile(train_Y, [duplicate_synth, 1])
synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)
# for use in TRTS
synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)
synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}
np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)
else:
print('Loading pre-generated data')
print('WARNING: not implemented for TRTS')
# get "train" data
exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
synth_X = exp_data['samples']
synth_Y = exp_data['labels']
n_synth = synth_X.shape[0]
synth_X = synth_X.reshape(n_synth, -1)
# pdb.set_trace()
# # ALERT ALERT MODIFYING
# synth_X = 2*(synth_X > 0) - 1
orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item()
if reverse:
which_setting = 'trts'
# visualise distribution of errors for train and test
print('Swapping synthetic test set in for real, to do TRTS!')
test_X = synth_testX
else:
print('Doing normal TSTR')
which_setting = 'tstr'
# # get test data
# test_X = data['test_X']
# test_Y = data['test_Y']
if not CNN:
model_choice = 'RF'
# if multivariate, reshape
if len(test_X.shape) == 3:
test_X = test_X.reshape(test_X.shape[0], -1)
if len(train_X.shape) == 3:
train_X = train_X.reshape(train_X.shape[0], -1)
if len(synth_X.shape) == 3:
synth_X = synth_X.reshape(synth_X.shape[0], -1)
else:
raise ValueError(CNN)
model_choice = 'CNN'
# we will select the best validation set epoch based on F1 score, take average across all the tasks
score_list = []
for label in range(synth_Y.shape[1]):
task = orig_data['Y_columns'][label]
if vali:
if not task in ['low_sao2', 'high_heartrate', 'low_respiration']:
print('Skipping task', task, 'because validation evaluation.')
continue
print('Evaluating on task:', task)
#print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)')
#m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label])
#m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label])
synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label])
synth_predY = synth_classifier.predict(test_X)
synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1]
real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label])
real_predY = real_classifier.predict(test_X)
real_predY_prob = real_classifier.predict_proba(test_X)[:, 1]
#print('(predicted', np.mean(predict), 'positive labels)')
synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted')
synth_accuracy = accuracy_score(test_Y[:, label], synth_predY)
synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob)
synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob)
synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]
real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted')
real_accuracy = accuracy_score(test_Y[:, label], real_predY)
real_auprc = average_precision_score(test_Y[:, label], real_predY_prob)
real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob)
real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]
all_scores = synth_scores + real_scores
if vali:
report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')
report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
report_file.close()
else:
report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')
report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
report_file.close()
print(classification_report(test_Y[:, label], synth_predY))
print(classification_report(test_Y[:, label], real_predY))
if task in ['low_sao2', 'high_heartrate', 'low_respiration']:
score_list.append(synth_auprc + synth_auroc)
if do_OR:
raise NotImplementedError
# do the OR task
extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4]
extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5]
extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6]
Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T
Y_OR_test = (Y_OR_test > 0)*1
extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4]
extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5]
extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6]
Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T
Y_OR_synth = (Y_OR_synth > 0)*1
OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP']
OR_results = []
for label in range(Y_OR_synth.shape[1]):
print('task:', OR_names[label])
print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)')
m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label])
predict = m.predict(X_test)
print('(predicted', np.mean(predict), 'positive labels)')
accuracy = accuracy_score(Y_OR_test[:, label], predict)
precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict)
recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict)
print(accuracy, precision, recall)
OR_results.append([accuracy, precision, recall])
else:
OR_results = []
score_across_tasks = np.mean(np.array(score_list))
return score_across_tasks
def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine, identifier_mnist, epoch_mnist):
"""
for each experiment:
- plot a bunch of train examples
- sample a bunch of generated examples
- plot all in separate PDFs so i can merge in illustrator
for sine and rbf, grey background
MNIST is just MNIST (square though)
"""
n_samples = 15
# settings
settings_rbf = json.load(open('./experiments/settings/' + identifier_rbf + '.txt', 'r'))
settings_sine = json.load(open('./experiments/settings/' + identifier_sine + '.txt', 'r'))
settings_mnist = json.load(open('./experiments/settings/' + identifier_mnist + '.txt', 'r'))
# data
data_rbf = np.load('./experiments/data/' + identifier_rbf + '.data.npy').item()
data_sine = np.load('./experiments/data/' + identifier_sine + '.data.npy').item()
data_mnist = np.load('./experiments/data/' + identifier_mnist + '.data.npy').item()
train_rbf = data_rbf['samples']['train']
train_sine = data_sine['samples']['train']
train_mnist = data_mnist['samples']['train']
# sample
samples_rbf = model.sample_trained_model(settings_rbf, epoch_rbf, n_samples)
samples_sine = model.sample_trained_model(settings_sine, epoch_sine, n_samples)
samples_mnist = model.sample_trained_model(settings_mnist, epoch_mnist, n_samples)
# plot them all
index = 0
#for sample in np.random.permutation(train_rbf)[:n_samples]:
# plotting.nips_plot_rbf(sample, index, 'train')
# index += 1
#for sample in samples_rbf:
# plotting.nips_plot_rbf(sample, index, 'GAN')
# index += 1
#for sample in np.random.permutation(train_sine)[:n_samples]:
# plotting.nips_plot_sine(sample, index, 'train')
# index += 1
#for sample in samples_sine:
# plotting.nips_plot_sine(sample, index, 'GAN')
# index += 1
for sample in np.random.permutation(train_mnist)[:n_samples]:
plotting.nips_plot_mnist(sample, index, 'train')
index += 1
for sample in samples_mnist:
plotting.nips_plot_mnist(sample, index, 'GAN')
index += 1
return True
================================================
FILE: experiments/settings/kdd99.txt
================================================
{
"settings_file": "",
"data": "kdd99",
"seq_length": 30,
"num_signals": 6,
"normalise": false,
"scale": 0.1,
"freq_low": 1.0,
"freq_high": 5.0,
"amplitude_low": 0.1,
"amplitude_high": 0.9,
"multivariate_mnist": false,
"full_mnist": false,
"data_load_from": "",
"resample_rate_in_min": 15,
"hidden_units_g": 100,
"hidden_units_d": 100,
"hidden_units_e": 100,
"kappa": 1,
"latent_dim": 15,
"weight": 0.5,
"degree": 1,
"batch_mean": false,
"learn_scale": false,
"learning_rate": 0.1,
"batch_size": 500,
"num_epochs": 100,
"D_rounds": 1,
"G_rounds": 3,
"E_rounds": 1,
"shuffle": true,
"eval_mul": false,
"eval_an": false,
"eval_single": false,
"wrong_labels": false,
"identifier": "kdd99",
"sub_id": "kdd99",
"dp": false,
"l2norm_bound": 1e-05,
"batches_per_lot": 1,
"dp_sigma": 1e-05,
"use_time": false,
"seq_step": 10,
"num_generated_features": 6
}
================================================
FILE: experiments/settings/kdd99_test.txt
================================================
{
"settings_file": "",
"data": "kdd99_test",
"seq_length": 30,
"num_signals": 6,
"normalise": false,
"scale": 0.1,
"freq_low": 1.0,
"freq_high": 5.0,
"amplitude_low": 0.1,
"amplitude_high": 0.9,
"multivariate_mnist": false,
"full_mnist": false,
"data_load_from": "",
"resample_rate_in_min": 15,
"hidden_units_g": 100,
"hidden_units_d": 100,
"hidden_units_e": 100,
"kappa": 1,
"latent_dim": 15,
"weight": 0.5,
"degree": 1,
"batch_mean": false,
"learn_scale": false,
"learning_rate": 0.1,
"batch_size": 500,
"num_epochs": 100,
"D_rounds": 1,
"G_rounds": 3,
"E_rounds": 1,
"shuffle": true,
"eval_mul": false,
"eval_an": false,
"eval_single": false,
"wrong_labels": false,
"identifier": "kdd99_test",
"sub_id": "kdd99",
"dp": false,
"l2norm_bound": 1e-05,
"batches_per_lot": 1,
"dp_sigma": 1e-05,
"use_time": false,
"seq_step": 10,
"num_generated_features": 6
}
================================================
FILE: mmd.py
================================================
'''
MMD functions implemented in tensorflow.
(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py)
'''
from __future__ import division
import tensorflow as tf
from tf_ops import dot, sq_sum
from scipy.spatial.distance import pdist
from numpy import median, vstack, einsum
import pdb
import numpy as np
_eps=1e-8
################################################################################
### Quadratic-time MMD with Gaussian RBF kernel
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
"""
"""
if wts is None:
wts = [1.0] * sigmas.get_shape()[0]
# debug!
if len(X.shape) == 2:
# matrix
XX = tf.matmul(X, X, transpose_b=True)
XY = tf.matmul(X, Y, transpose_b=True)
YY = tf.matmul(Y, Y, transpose_b=True)
elif len(X.shape) == 3:
# tensor -- this is computing the Frobenius norm
XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]])
XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]])
YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]])
else:
raise ValueError(X)
X_sqnorms = tf.diag_part(XX)
Y_sqnorms = tf.diag_part(YY)
r = lambda x: tf.expand_dims(x, 0)
c = lambda x: tf.expand_dims(x, 1)
K_XX, K_XY, K_YY = 0, 0, 0
for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts):
gamma = 1 / (2 * sigma**2)
K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))
return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
def rbf_mmd2(X, Y, sigma=1, biased=True):
return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased)
def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):
K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased)
def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
################################################################################
### Helper functions to compute variances based on kernel matrices
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
m = tf.cast(K_XX.get_shape()[0], tf.float32)
n = tf.cast(K_YY.get_shape()[0], tf.float32)
if biased:
mmd2 = (tf.reduce_sum(K_XX) / (m * m)
+ tf.reduce_sum(K_YY) / (n * n)
- 2 * tf.reduce_sum(K_XY) / (m * n))
else:
if const_diagonal is not False:
trace_X = m * const_diagonal
trace_Y = n * const_diagonal
else:
trace_X = tf.trace(K_XX)
trace_Y = tf.trace(K_YY)
mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1))
+ (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1))
- 2 * tf.reduce_sum(K_XY) / (m * n))
return mmd2
def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
min_var_est=_eps):
mmd2, var_est = _mmd2_and_variance(
K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)
ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))
return mmd2, ratio
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape
### Get the various sums of kernels that we'll use
# Kts drop the diagonal, but we don't need to compute them explicitly
if const_diagonal is not False:
const_diagonal = tf.cast(const_diagonal, tf.float32)
diag_X = diag_Y = const_diagonal
sum_diag_X = sum_diag_Y = m * const_diagonal
sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
else:
diag_X = tf.diag_part(K_XX)
diag_Y = tf.diag_part(K_YY)
sum_diag_X = tf.reduce_sum(diag_X)
sum_diag_Y = tf.reduce_sum(diag_Y)
sum_diag2_X = sq_sum(diag_X)
sum_diag2_Y = sq_sum(diag_Y)
Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
K_XY_sums_1 = tf.reduce_sum(K_XY, 1)
Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
K_XY_sum = tf.reduce_sum(K_XY_sums_0)
Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
K_XY_2_sum = sq_sum(K_XY)
if biased:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
+ (Kt_YY_sum + sum_diag_Y) / (m * m)
- 2 * K_XY_sum / (m * m))
else:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
+ (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
- 2 * K_XY_sum / (m * m))
var_est = (
2 / (m**2 * (m-1)**2) * (
2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
+ 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
- (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
+ 4*(m-2) / (m**3 * (m-1)**2) * (
sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
- 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
- (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
+ 8 / (m**3 * (m-1)) * (
1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
- dot(Kt_XX_sums, K_XY_sums_1)
- dot(Kt_YY_sums, K_XY_sums_0))
)
return mmd2, var_est
### additions from stephanie, for convenience
def median_pairwise_distance(X, Y=None):
"""
Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.
If Y is missing, just calculate it from X:
this is so that, during training, as Y changes, we can use a fixed
bandwidth (and save recalculating this each time we evaluated the mmd)
At the end of training, we do the heuristic "correctly" by including
both X and Y.
Note: most of this code is assuming tensorflow, but X and Y are just ndarrays
"""
if Y is None:
Y = X # this is horrendously inefficient, sorry
if len(X.shape) == 2:
# matrix
X_sqnorms = einsum('...i,...i', X, X)
Y_sqnorms = einsum('...i,...i', Y, Y)
XY = einsum('ia,ja', X, Y)
elif len(X.shape) == 3:
# tensor -- this is computing the Frobenius norm
X_sqnorms = einsum('...ij,...ij', X, X)
Y_sqnorms = einsum('...ij,...ij', Y, Y)
XY = einsum('iab,jab', X, Y)
else:
raise ValueError(X)
distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1))
return median(distances)
def median_pairwise_distance_o(X, Y=None):
"""
Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.
If Y is missing, just calculate it from X:
this is so that, during training, as Y changes, we can use a fixed
bandwidth (and save recalculating this each time we evaluated the mmd)
At the end of training, we do the heuristic "correctly" by including
both X and Y.
Note: most of this code is assuming tensorflow, but X and Y are just ndarrays
"""
if Y is None:
Y = X # this is horrendously inefficient, sorry
if len(X.shape) == 2:
# matrix
X_sqnorms = np.einsum('...i,...i', X, X)
Y_sqnorms = np.einsum('...i,...i', Y, Y)
XY = np.einsum('ia,ja', X, Y)
elif len(X.shape) == 3:
# tensor -- this is computing the Frobenius norm
X_sqnorms = np.einsum('...ij,...ij', X, X) # reduce the tensor shape
Y_sqnorms = np.einsum('...ij,...ij', Y, Y)
XY = np.einsum('iab,jab', X, Y) # X*Y^T??
else:
raise ValueError(X)
distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2 * XY + Y_sqnorms.reshape(1, -1))
distances = distances.reshape(-1, 1)
distances = distances[~np.isnan(distances)]
return np.median(distances)
================================================
FILE: mod_core_rnn_cell_impl.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#modified by Stephanie (@corcra) to enable initializing the bias term in lstm """
# ==============================================================================
"""Module implementing RNN Cells."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import contextlib
import hashlib
import math
import numbers
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import embedding_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
#from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell
from tensorflow.python.ops.rnn_cell_impl import RNNCell
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
_BIAS_VARIABLE_NAME = "biases"
_WEIGHTS_VARIABLE_NAME = "weights"
@contextlib.contextmanager
def _checked_scope(cell, scope, reuse=None, **kwargs):
if reuse is not None:
kwargs["reuse"] = reuse
with vs.variable_scope(scope, **kwargs) as checking_scope:
scope_name = checking_scope.name
if hasattr(cell, "_scope"):
cell_scope = cell._scope # pylint: disable=protected-access
if cell_scope.name != checking_scope.name:
raise ValueError(
"Attempt to reuse RNNCell %s with a different variable scope than "
"its first use. First use of cell was with scope '%s', this "
"attempt is with scope '%s'. Please create a new instance of the "
"cell if you would like it to use a different set of weights. "
"If before you were using: MultiRNNCell([%s(...)] * num_layers), "
"change to: MultiRNNCell([%s(...) for _ in range(num_layers)]). "
"If before you were using the same cell instance as both the "
"forward and reverse cell of a bidirectional RNN, simply create "
"two instances (one for forward, one for reverse). "
"In May 2017, we will start transitioning this cell's behavior "
"to use existing stored weights, if any, when it is called "
"with scope=None (which can lead to silent model degradation, so "
"this error will remain until then.)"
% (cell, cell_scope.name, scope_name, type(cell).__name__,
type(cell).__name__))
else:
weights_found = False
try:
with vs.variable_scope(checking_scope, reuse=True):
vs.get_variable(_WEIGHTS_VARIABLE_NAME)
weights_found = True
except ValueError:
pass
if weights_found and reuse is None:
raise ValueError(
"Attempt to have a second RNNCell use the weights of a variable "
"scope that already has weights: '%s'; and the cell was not "
"constructed as %s(..., reuse=True). "
"To share the weights of an RNNCell, simply "
"reuse it in your second calculation, or create a new one with "
"the argument reuse=True." % (scope_name, type(cell).__name__))
# Everything is OK. Update the cell's scope and yield it.
cell._scope = checking_scope # pylint: disable=protected-access
yield checking_scope
class BasicRNNCell(RNNCell):
"""The most basic RNN cell."""
def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
self._num_units = num_units
self._activation = activation
self._reuse = reuse
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Most basic RNN: output = new_state = act(W * input + U * state + B)."""
with _checked_scope(self, scope or "basic_rnn_cell", reuse=self._reuse):
output = self._activation(
_linear([inputs, state], self._num_units, True))
return output, output
class GRUCell(RNNCell):
"""Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078)."""
def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
self._num_units = num_units
self._activation = activation
self._reuse = reuse
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Gated recurrent unit (GRU) with nunits cells."""
with _checked_scope(self, scope or "gru_cell", reuse=self._reuse):
with vs.variable_scope("gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
value = sigmoid(_linear(
[inputs, state], 2 * self._num_units, True, 1.0))
r, u = array_ops.split(
value=value,
num_or_size_splits=2,
axis=1)
with vs.variable_scope("candidate"):
c = self._activation(_linear([inputs, r * state],
self._num_units, True))
new_h = u * state + (1 - u) * c
return new_h, new_h
_LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h"))
class LSTMStateTuple(_LSTMStateTuple):
"""Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state.
Stores two elements: `(c, h)`, in that order.
Only used when `state_is_tuple=True`.
"""
__slots__ = ()
@property
def dtype(self):
(c, h) = self
if not c.dtype == h.dtype:
raise TypeError("Inconsistent internal state: %s vs %s" %
(str(c.dtype), str(h.dtype)))
return c.dtype
class BasicLSTMCell(RNNCell):
"""Basic LSTM recurrent network cell.
The implementation is based on: http://arxiv.org/abs/1409.2329.
We add forget_bias (default: 1) to the biases of the forget gate in order to
reduce the scale of forgetting in the beginning of the training.
It does not allow cell clipping, a projection layer, and does not
use peep-hole connections: it is the basic baseline.
For advanced models, please use the full LSTMCell that follows.
"""
def __init__(self, num_units, forget_bias=1.0, input_size=None,
state_is_tuple=True, activation=tanh, reuse=None):
"""Initialize the basic LSTM cell.
Args:
num_units: int, The number of units in the LSTM cell.
forget_bias: float, The bias added to forget gates (see above).
input_size: Deprecated and unused.
state_is_tuple: If True, accepted and returned states are 2-tuples of
the `c_state` and `m_state`. If False, they are concatenated
along the column axis. The latter behavior will soon be deprecated.
activation: Activation function of the inner states.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
"""
if not state_is_tuple:
logging.warn("%s: Using a concatenated state is slower and will soon be "
"deprecated. Use state_is_tuple=True.", self)
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
self._num_units = num_units
self._forget_bias = forget_bias
self._state_is_tuple = state_is_tuple
self._activation = activation
self._reuse = reuse
@property
def state_size(self):
return (LSTMStateTuple(self._num_units, self._num_units)
if self._state_is_tuple else 2 * self._num_units)
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse):
# Parameters of gates are concatenated into one multiply for efficiency.
if self._state_is_tuple:
c, h = state
else:
c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)
concat = _linear([inputs, h], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
self._activation(j))
new_h = self._activation(new_c) * sigmoid(o)
if self._state_is_tuple:
new_state = LSTMStateTuple(new_c, new_h)
else:
new_state = array_ops.concat([new_c, new_h], 1)
return new_h, new_state
class LSTMCell(RNNCell):
"""Long short-term memory unit (LSTM) recurrent network cell.
The default non-peephole implementation is based on:
http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
S. Hochreiter and J. Schmidhuber.
"Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
The peephole implementation is based on:
https://research.google.com/pubs/archive/43905.pdf
Hasim Sak, Andrew Senior, and Francoise Beaufays.
"Long short-term memory recurrent neural network architectures for
large scale acoustic modeling." INTERSPEECH, 2014.
The class uses optional peep-hole connections, optional cell clipping, and
an optional projection layer.
"""
def __init__(self, num_units, input_size=None,
use_peepholes=False, cell_clip=None,
initializer=None, bias_start=0.0, num_proj=None, proj_clip=None,
num_unit_shards=None, num_proj_shards=None,
forget_bias=1.0, state_is_tuple=True,
activation=tanh, reuse=None):
"""Initialize the parameters for an LSTM cell.
Args:
num_units: int, The number of units in the LSTM cell
input_size: Deprecated and unused.
use_peepholes: bool, set True to enable diagonal/peephole connections.
cell_clip: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
initializer: (optional) The initializer to use for the weight and
projection matrices.
bias_start: (optional) The VALUE to initialize the bias to, in
the linear call
num_proj: (optional) int, The output dimensionality for the projection
matrices. If None, no projection is performed.
proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is
provided, then the projected values are clipped elementwise to within
`[-proj_clip, proj_clip]`.
num_unit_shards: Deprecated, will be removed by Jan. 2017.
Use a variable_scope partitioner instead.
num_proj_shards: Deprecated, will be removed by Jan. 2017.
Use a variable_scope partitioner instead.
forget_bias: Biases of the forget gate are initialized by default to 1
in order to reduce the scale of forgetting at the beginning of
the training.
state_is_tuple: If True, accepted and returned states are 2-tuples of
the `c_state` and `m_state`. If False, they are concatenated
along the column axis. This latter behavior will soon be deprecated.
activation: Activation function of the inner states.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
"""
if not state_is_tuple:
logging.warn("%s: Using a concatenated state is slower and will soon be "
"deprecated. Use state_is_tuple=True.", self)
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
if num_unit_shards is not None or num_proj_shards is not None:
logging.warn(
"%s: The num_unit_shards and proj_unit_shards parameters are "
"deprecated and will be removed in Jan 2017. "
"Use a variable scope with a partitioner instead.", self)
self._num_units = num_units
self._use_peepholes = use_peepholes
self._cell_clip = cell_clip
self._initializer = initializer
self._bias_start = bias_start
self._num_proj = num_proj
self._proj_clip = proj_clip
self._num_unit_shards = num_unit_shards
self._num_proj_shards = num_proj_shards
self._forget_bias = forget_bias
self._state_is_tuple = state_is_tuple
self._activation = activation
self._reuse = reuse
if num_proj:
self._state_size = (
LSTMStateTuple(num_units, num_proj)
if state_is_tuple else num_units + num_proj)
self._output_size = num_proj
else:
self._state_size = (
LSTMStateTuple(num_units, num_units)
if state_is_tuple else 2 * num_units)
self._output_size = num_units
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._output_size
def __call__(self, inputs, state, scope=None):
"""Run one step of LSTM.
Args:
inputs: input Tensor, 2D, batch x num_units.
state: if `state_is_tuple` is False, this must be a state Tensor,
`2-D, batch x state_size`. If `state_is_tuple` is True, this must be a
tuple of state Tensors, both `2-D`, with column sizes `c_state` and
`m_state`.
scope: VariableScope for the created subgraph; defaults to "lstm_cell".
Returns:
A tuple containing:
- A `2-D, [batch x output_dim]`, Tensor representing the output of the
LSTM after reading `inputs` when previous state was `state`.
Here output_dim is:
num_proj if num_proj was set,
num_units otherwise.
- Tensor(s) representing the new state of LSTM after reading `inputs` when
the previous state was `state`. Same type and shape(s) as `state`.
Raises:
ValueError: If input size cannot be inferred from inputs via
static shape inference.
"""
num_proj = self._num_units if self._num_proj is None else self._num_proj
if self._state_is_tuple:
(c_prev, m_prev) = state
else:
c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
dtype = inputs.dtype
input_size = inputs.get_shape().with_rank(2)[1]
if input_size.value is None:
raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
with _checked_scope(self, scope or "lstm_cell",
initializer=self._initializer,
reuse=self._reuse) as unit_scope:
if self._num_unit_shards is not None:
unit_scope.set_partitioner(
partitioned_variables.fixed_size_partitioner(
self._num_unit_shards))
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, bias_start=self._bias_start)
i, j, f, o = array_ops.split(
value=lstm_matrix, num_or_size_splits=4, axis=1)
# Diagonal connections
if self._use_peepholes:
with vs.variable_scope(unit_scope) as projection_scope:
if self._num_unit_shards is not None:
projection_scope.set_partitioner(None)
w_f_diag = vs.get_variable(
"w_f_diag", shape=[self._num_units], dtype=dtype)
w_i_diag = vs.get_variable(
"w_i_diag", shape=[self._num_units], dtype=dtype)
w_o_diag = vs.get_variable(
"w_o_diag", shape=[self._num_units], dtype=dtype)
if self._use_peepholes:
c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
sigmoid(i + w_i_diag * c_prev) * self._activation(j))
else:
c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
self._activation(j))
if self._cell_clip is not None:
# pylint: disable=invalid-unary-operand-type
c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
# pylint: enable=invalid-unary-operand-type
if self._use_peepholes:
m = sigmoid(o + w_o_diag * c) * self._activation(c)
else:
m = sigmoid(o) * self._activation(c)
if self._num_proj is not None:
with vs.variable_scope("projection") as proj_scope:
if self._num_proj_shards is not None:
proj_scope.set_partitioner(
partitioned_variables.fixed_size_partitioner(
self._num_proj_shards))
m = _linear(m, self._num_proj, bias=False)
if self._proj_clip is not None:
# pylint: disable=invalid-unary-operand-type
m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
# pylint: enable=invalid-unary-operand-type
new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
array_ops.concat([c, m], 1))
return m, new_state
class OutputProjectionWrapper(RNNCell):
"""Operator adding an output projection to the given cell.
Note: in many cases it may be more efficient to not use this wrapper,
but instead concatenate the whole sequence of your outputs in time,
do the projection on this batch-concatenated sequence, then split it
if needed or directly feed into a softmax.
"""
def __init__(self, cell, output_size, reuse=None):
"""Create a cell with output projection.
Args:
cell: an RNNCell, a projection to output_size is added to it.
output_size: integer, the size of the output after projection.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
Raises:
TypeError: if cell is not an RNNCell.
ValueError: if output_size is not positive.
"""
if not isinstance(cell, RNNCell):
raise TypeError("The parameter cell is not RNNCell.")
if output_size < 1:
raise ValueError("Parameter output_size must be > 0: %d." % output_size)
self._cell = cell
self._output_size = output_size
self._reuse = reuse
@property
def state_size(self):
return self._cell.state_size
@property
def output_size(self):
return self._output_size
def zero_state(self, batch_size, dtype):
with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
return self._cell.zero_state(batch_size, dtype)
def __call__(self, inputs, state, scope=None):
"""Run the cell and output projection on inputs, starting from state."""
output, res_state = self._cell(inputs, state)
# Default scope: "OutputProjectionWrapper"
with _checked_scope(self, scope or "output_projection_wrapper",
reuse=self._reuse):
projected = _linear(output, self._output_size, True)
return projected, res_state
class InputProjectionWrapper(RNNCell):
"""Operator adding an input projection to the given cell.
Note: in many cases it may be more efficient to not use this wrapper,
but instead concatenate the whole sequence of your inputs in time,
do the projection on this batch-concatenated sequence, then split it.
"""
def __init__(self, cell, num_proj, input_size=None):
"""Create a cell with input projection.
Args:
cell: an RNNCell, a projection of inputs is added before it.
num_proj: Python integer. The dimension to project to.
input_size: Deprecated and unused.
Raises:
TypeError: if cell is not an RNNCell.
"""
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
if not isinstance(cell, RNNCell):
raise TypeError("The parameter cell is not RNNCell.")
self._cell = cell
self._num_proj = num_proj
@property
def state_size(self):
return self._cell.state_size
@property
def output_size(self):
return self._cell.output_size
def zero_state(self, batch_size, dtype):
with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
return self._cell.zero_state(batch_size, dtype)
def __call__(self, inputs, state, scope=None):
"""Run the input projection and then the cell."""
# Default scope: "InputProjectionWrapper"
with vs.variable_scope(scope or "input_projection_wrapper"):
projected = _linear(inputs, self._num_proj, True)
return self._cell(projected, state)
def _enumerated_map_structure(map_fn, *args, **kwargs):
ix = [0]
def enumerated_fn(*inner_args, **inner_kwargs):
r = map_fn(ix[0], *inner_args, **inner_kwargs)
ix[0] += 1
return r
return nest.map_structure(enumerated_fn, *args, **kwargs)
class DropoutWrapper(RNNCell):
"""Operator adding dropout to inputs and outputs of the given cell."""
def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
state_keep_prob=1.0, variational_recurrent=False,
input_size=None, dtype=None, seed=None):
"""Create a cell with added input, state, and/or output dropout.
If `variational_recurrent` is set to `True` (**NOT** the default behavior),
then the the same dropout mask is applied at every step, as described in:
Y. Gal, Z Ghahramani. "A Theoretically Grounded Application of Dropout in
Recurrent Neural Networks". https://arxiv.org/abs/1512.05287
Otherwise a different dropout mask is applied at every time step.
Args:
cell: an RNNCell, a projection to output_size is added to it.
input_keep_prob: unit Tensor or float between 0 and 1, input keep
probability; if it is constant and 1, no input dropout will be added.
output_keep_prob: unit Tensor or float between 0 and 1, output keep
probability; if it is constant and 1, no output dropout will be added.
state_keep_prob: unit Tensor or float between 0 and 1, output keep
probability; if it is constant and 1, no output dropout will be added.
State dropout is performed on the *output* states of the cell.
variational_recurrent: Python bool. If `True`, then the same
dropout pattern is applied across all time steps per run call.
If this parameter is set, `input_size` **must** be provided.
input_size: (optional) (possibly nested tuple of) `TensorShape` objects
containing the depth(s) of the input tensors expected to be passed in to
the `DropoutWrapper`. Required and used **iff**
`variational_recurrent = True` and `input_keep_prob < 1`.
dtype: (optional) The `dtype` of the input, state, and output tensors.
Required and used **iff** `variational_recurrent = True`.
seed: (optional) integer, the randomness seed.
Raises:
TypeError: if cell is not an RNNCell.
ValueError: if any of the keep_probs are not between 0 and 1.
"""
if not isinstance(cell, RNNCell):
raise TypeError("The parameter cell is not a RNNCell.")
with ops.name_scope("DropoutWrapperInit"):
def tensor_and_const_value(v):
tensor_value = ops.convert_to_tensor(v)
const_value = tensor_util.constant_value(tensor_value)
return (tensor_value, const_value)
for prob, attr in [(input_keep_prob, "input_keep_prob"),
(state_keep_prob, "state_keep_prob"),
(output_keep_prob, "output_keep_prob")]:
tensor_prob, const_prob = tensor_and_const_value(prob)
if const_prob is not None:
if const_prob < 0 or const_prob > 1:
raise ValueError("Parameter %s must be between 0 and 1: %d"
% (attr, const_prob))
setattr(self, "_%s" % attr, float(const_prob))
else:
setattr(self, "_%s" % attr, tensor_prob)
# Set cell, variational_recurrent, seed before running the code below
self._cell = cell
self._variational_recurrent = variational_recurrent
self._seed = seed
self._recurrent_input_noise = None
self._recurrent_state_noise = None
self._recurrent_output_noise = None
if variational_recurrent:
if dtype is None:
raise ValueError(
"When variational_recurrent=True, dtype must be provided")
def convert_to_batch_shape(s):
# Prepend a 1 for the batch dimension; for recurrent
# variational dropout we use the same dropout mask for all
# batch elements.
return array_ops.concat(
([1], tensor_shape.TensorShape(s).as_list()), 0)
def batch_noise(s, inner_seed):
shape = convert_to_batch_shape(s)
return random_ops.random_uniform(shape, seed=inner_seed, dtype=dtype)
if (not isinstance(self._input_keep_prob, numbers.Real) or
self._input_keep_prob < 1.0):
if input_size is None:
raise ValueError(
"When variational_recurrent=True and input_keep_prob < 1.0 or "
"is unknown, input_size must be provided")
self._recurrent_input_noise = _enumerated_map_structure(
lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)),
input_size)
self._recurrent_state_noise = _enumerated_map_structure(
lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)),
cell.state_size)
self._recurrent_output_noise = _enumerated_map_str
gitextract_fejrc7rh/ ├── .gitattributes ├── AD.py ├── AD_Invert.py ├── DR_discriminator.py ├── README.md ├── RGAN.py ├── data_utils.py ├── differential_privacy/ │ ├── dp_sgd/ │ │ └── dp_optimizer/ │ │ ├── dp_optimizer.py │ │ ├── sanitizer.py │ │ └── utils.py │ └── privacy_accountant/ │ └── tf/ │ └── accountant.py ├── eugenium_mmd.py ├── eval.py ├── experiments/ │ ├── parameters/ │ │ ├── kdd99_30_0.npy │ │ ├── kdd99_30_1.npy │ │ ├── kdd99_30_10.npy │ │ ├── kdd99_30_11.npy │ │ ├── kdd99_30_12.npy │ │ ├── kdd99_30_13.npy │ │ ├── kdd99_30_14.npy │ │ ├── kdd99_30_15.npy │ │ ├── kdd99_30_16.npy │ │ ├── kdd99_30_17.npy │ │ ├── kdd99_30_18.npy │ │ ├── kdd99_30_19.npy │ │ ├── kdd99_30_2.npy │ │ ├── kdd99_30_20.npy │ │ ├── kdd99_30_21.npy │ │ ├── kdd99_30_22.npy │ │ ├── kdd99_30_3.npy │ │ ├── kdd99_30_4.npy │ │ ├── kdd99_30_5.npy │ │ ├── kdd99_30_6.npy │ │ ├── kdd99_30_7.npy │ │ ├── kdd99_30_8.npy │ │ └── kdd99_30_9.npy │ ├── plots/ │ │ └── gs/ │ │ └── kdd99_gs_real.npy │ └── settings/ │ ├── kdd99.txt │ └── kdd99_test.txt ├── mmd.py ├── mod_core_rnn_cell_impl.py ├── model.py ├── plotting.py ├── tf_ops.py └── utils.py
SYMBOL INDEX (216 symbols across 16 files)
FILE: AD.py
class myADclass (line 49) | class myADclass():
method __init__ (line 50) | def __init__(self, epoch, settings=settings, samples=samples, labels=l...
method ADfunc (line 56) | def ADfunc(self):
FILE: AD_Invert.py
class myADclass (line 45) | class myADclass():
method __init__ (line 46) | def __init__(self, epoch, settings=settings, samples=samples, labels=l...
method ADfunc (line 52) | def ADfunc(self):
FILE: DR_discriminator.py
function anomaly_detection_plot (line 10) | def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier):
function detection_Comb (line 40) | def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao):
function detection_logits_I (line 87) | def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao):
function detection_statistic_I (line 155) | def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao):
function detection_D_I (line 222) | def detection_D_I(DD, L_mb, I_mb, seq_step, tao):
function detection_R_D_I (line 290) | def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam):
function detection_R_I (line 364) | def detection_R_I(Gs, T_mb, L_mb, seq_step, tao):
function sample_detection (line 432) | def sample_detection(D_test, L_mb, tao):
function CUSUM_det (line 464) | def CUSUM_det(spe_n, spe_a, labels):
function SPE (line 531) | def SPE(X, pc):
function generator_o (line 549) | def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated...
function discriminator_o (line 589) | def discriminator_o(x, hidden_units_d, reuse=False, parameters=None):
function invert (line 615) | def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1,
function dis_trained_model (line 722) | def dis_trained_model(settings, samples, para_path):
function dis_D_model (line 761) | def dis_D_model(settings, samples, para_path):
FILE: data_utils.py
function swat (line 24) | def swat(seq_length, seq_step, num_signals, randomize=False):
function swat_birgan (line 90) | def swat_birgan(seq_length, seq_step, num_signals, randomize=False):
function swat_test (line 133) | def swat_test(seq_length, seq_step, num_signals, randomize=False):
function swat_birgan_test (line 200) | def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False):
function wadi (line 245) | def wadi(seq_length, seq_step, num_signals, randomize=False):
function wadi_test (line 302) | def wadi_test(seq_length, seq_step, num_signals, randomize=False):
function kdd99 (line 357) | def kdd99(seq_length, seq_step, num_signals):
function kdd99_test (line 409) | def kdd99_test(seq_length, seq_step, num_signals):
function get_samples_and_labels (line 468) | def get_samples_and_labels(settings):
function get_data (line 544) | def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_...
function get_batch (line 574) | def get_batch(samples, batch_size, batch_idx, labels=None):
function split (line 589) | def split(samples, proportions, normalise=False, scale=False, labels=Non...
FILE: differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py
class DPGradientDescentOptimizer (line 30) | class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):
method __init__ (line 34) | def __init__(self, learning_rate, eps_delta, sanitizer,
method compute_sanitized_gradients (line 72) | def compute_sanitized_gradients(self, loss, var_list=None,
method minimize (line 110) | def minimize(self, loss, global_step=None, var_list=None,
FILE: differential_privacy/dp_sgd/dp_optimizer/sanitizer.py
class AmortizedGaussianSanitizer (line 39) | class AmortizedGaussianSanitizer(object):
method __init__ (line 47) | def __init__(self, accountant, default_option):
method set_option (line 59) | def set_option(self, tensor_name, option):
method sanitize (line 69) | def sanitize(self, x, eps_delta, sigma=None,
FILE: differential_privacy/dp_sgd/dp_optimizer/utils.py
class LayerParameters (line 29) | class LayerParameters(object):
method __init__ (line 31) | def __init__(self):
class ConvParameters (line 42) | class ConvParameters(object):
method __init__ (line 44) | def __init__(self):
class NetworkParameters (line 62) | class NetworkParameters(object):
method __init__ (line 64) | def __init__(self):
function GetTensorOpName (line 73) | def GetTensorOpName(x):
function BuildNetwork (line 91) | def BuildNetwork(inputs, network_parameters):
function VaryRate (line 199) | def VaryRate(start, end, saturate_epochs, epoch):
function BatchClipByL2norm (line 223) | def BatchClipByL2norm(t, upper_bound, name=None):
function SoftThreshold (line 254) | def SoftThreshold(t, threshold_ratio, name=None):
function AddGaussianNoise (line 283) | def AddGaussianNoise(t, sigma, name=None):
function GenerateBinomialTable (line 300) | def GenerateBinomialTable(m):
FILE: differential_privacy/privacy_accountant/tf/accountant.py
class AmortizedAccountant (line 50) | class AmortizedAccountant(object):
method __init__ (line 59) | def __init__(self, total_examples):
method accumulate_privacy_spending (line 73) | def accumulate_privacy_spending(self, eps_delta, unused_sigma,
method get_privacy_spent (line 108) | def get_privacy_spent(self, sess, target_eps=None):
class MomentsAccountant (line 127) | class MomentsAccountant(object):
method __init__ (line 179) | def __init__(self, total_examples, moment_orders=32):
method _compute_log_moment (line 200) | def _compute_log_moment(self, sigma, q, moment_order):
method accumulate_privacy_spending (line 212) | def accumulate_privacy_spending(self, unused_eps_delta,
method _compute_delta (line 241) | def _compute_delta(self, log_moments, eps):
method _compute_eps (line 261) | def _compute_eps(self, log_moments, delta):
method get_privacy_spent (line 270) | def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):
class GaussianMomentsAccountant (line 299) | class GaussianMomentsAccountant(MomentsAccountant):
method __init__ (line 332) | def __init__(self, total_examples, moment_orders=32):
method _differential_moments (line 342) | def _differential_moments(self, sigma, s, t):
method _compute_log_moment (line 379) | def _compute_log_moment(self, sigma, q, moment_order):
class DummyAccountant (line 407) | class DummyAccountant(object):
method accumulate_privacy_spending (line 410) | def accumulate_privacy_spending(self, *unused_args):
method get_privacy_spent (line 413) | def get_privacy_spent(self, unused_sess, **unused_kwargs):
FILE: eugenium_mmd.py
function my_kernel (line 21) | def my_kernel(X, Y, sigma):
function MMD_3_Sample_Test (line 36) | def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=F...
function MMD_Diff_Var (line 92) | def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz):
function grbf (line 151) | def grbf(x1, x2, sigma):
function kernelwidthPair (line 174) | def kernelwidthPair(x1, x2):
function kernelwidth (line 204) | def kernelwidth(Zmed):
function MMD_unbiased (line 229) | def MMD_unbiased(Kxx, Kyy, Kxy):
FILE: eval.py
function assert_same_data (line 33) | def assert_same_data(A, B):
function model_memorisation (line 48) | def model_memorisation(identifier, epoch, max_samples=2000, tstr=False):
function model_comparison (line 128) | def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99):
function get_reconstruction_errors (line 160) | def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_s...
function error_per_sample (line 261) | def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g...
function view_digit (line 281) | def view_digit(identifier, epoch, digit, n_samples=6):
function view_interpolation (line 301) | def view_interpolation(identifier, epoch, n_steps=6, input_samples=None,...
function view_latent_vary (line 332) | def view_latent_vary(identifier, epoch, n_steps=6):
function view_reconstruction (line 343) | def view_reconstruction(identifier, epoch, real_samples, tolerance=1):
function view_fixed (line 356) | def view_fixed(identifier, epoch, n_samples=6, dim=None):
function view_params (line 373) | def view_params(identifier, epoch):
function sample_distance (line 382) | def sample_distance(sampleA, sampleB, sigma):
function train_CNN (line 394) | def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X):
function TSTR_mnist (line 432) | def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali...
function TSTR_eICU (line 542) | def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do...
function NIPS_toy_plot (line 682) | def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine...
FILE: mmd.py
function _mix_rbf_kernel (line 21) | def _mix_rbf_kernel(X, Y, sigmas, wts=None):
function rbf_mmd2 (line 57) | def rbf_mmd2(X, Y, sigma=1, biased=True):
function mix_rbf_mmd2 (line 61) | def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):
function rbf_mmd2_and_ratio (line 66) | def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
function mix_rbf_mmd2_and_ratio (line 70) | def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
function _mmd2 (line 79) | def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
function _mmd2_and_ratio (line 102) | def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
function _mmd2_and_variance (line 110) | def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=Fa...
function median_pairwise_distance (line 172) | def median_pairwise_distance(X, Y=None):
function median_pairwise_distance_o (line 203) | def median_pairwise_distance_o(X, Y=None):
FILE: mod_core_rnn_cell_impl.py
function _checked_scope (line 57) | def _checked_scope(cell, scope, reuse=None, **kwargs):
class BasicRNNCell (line 103) | class BasicRNNCell(RNNCell):
method __init__ (line 106) | def __init__(self, num_units, input_size=None, activation=tanh, reuse=...
method state_size (line 114) | def state_size(self):
method output_size (line 118) | def output_size(self):
method __call__ (line 121) | def __call__(self, inputs, state, scope=None):
class GRUCell (line 129) | class GRUCell(RNNCell):
method __init__ (line 132) | def __init__(self, num_units, input_size=None, activation=tanh, reuse=...
method state_size (line 140) | def state_size(self):
method output_size (line 144) | def output_size(self):
method __call__ (line 147) | def __call__(self, inputs, state, scope=None):
class LSTMStateTuple (line 168) | class LSTMStateTuple(_LSTMStateTuple):
method dtype (line 178) | def dtype(self):
class BasicLSTMCell (line 186) | class BasicLSTMCell(RNNCell):
method __init__ (line 200) | def __init__(self, num_units, forget_bias=1.0, input_size=None,
method state_size (line 228) | def state_size(self):
method output_size (line 233) | def output_size(self):
method __call__ (line 236) | def __call__(self, inputs, state, scope=None):
class LSTMCell (line 260) | class LSTMCell(RNNCell):
method __init__ (line 282) | def __init__(self, num_units, input_size=None,
method state_size (line 357) | def state_size(self):
method output_size (line 361) | def output_size(self):
method __call__ (line 364) | def __call__(self, inputs, state, scope=None):
class OutputProjectionWrapper (line 459) | class OutputProjectionWrapper(RNNCell):
method __init__ (line 468) | def __init__(self, cell, output_size, reuse=None):
method state_size (line 491) | def state_size(self):
method output_size (line 495) | def output_size(self):
method zero_state (line 498) | def zero_state(self, batch_size, dtype):
method __call__ (line 502) | def __call__(self, inputs, state, scope=None):
class InputProjectionWrapper (line 512) | class InputProjectionWrapper(RNNCell):
method __init__ (line 520) | def __init__(self, cell, num_proj, input_size=None):
method state_size (line 539) | def state_size(self):
method output_size (line 543) | def output_size(self):
method zero_state (line 546) | def zero_state(self, batch_size, dtype):
method __call__ (line 550) | def __call__(self, inputs, state, scope=None):
function _enumerated_map_structure (line 558) | def _enumerated_map_structure(map_fn, *args, **kwargs):
class DropoutWrapper (line 567) | class DropoutWrapper(RNNCell):
method __init__ (line 570) | def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
method _gen_seed (line 667) | def _gen_seed(self, salt_prefix, index):
method state_size (line 675) | def state_size(self):
method output_size (line 679) | def output_size(self):
method zero_state (line 682) | def zero_state(self, batch_size, dtype):
method _variational_recurrent_dropout_value (line 686) | def _variational_recurrent_dropout_value(
method _dropout (line 698) | def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob):
method __call__ (line 710) | def __call__(self, inputs, state, scope=None):
class ResidualWrapper (line 731) | class ResidualWrapper(RNNCell):
method __init__ (line 734) | def __init__(self, cell):
method state_size (line 743) | def state_size(self):
method output_size (line 747) | def output_size(self):
method zero_state (line 750) | def zero_state(self, batch_size, dtype):
method __call__ (line 754) | def __call__(self, inputs, state, scope=None):
class DeviceWrapper (line 780) | class DeviceWrapper(RNNCell):
method __init__ (line 783) | def __init__(self, cell, device):
method state_size (line 796) | def state_size(self):
method output_size (line 800) | def output_size(self):
method zero_state (line 803) | def zero_state(self, batch_size, dtype):
method __call__ (line 807) | def __call__(self, inputs, state, scope=None):
class EmbeddingWrapper (line 813) | class EmbeddingWrapper(RNNCell):
method __init__ (line 822) | def __init__(self, cell, embedding_classes, embedding_size, initialize...
method state_size (line 852) | def state_size(self):
method output_size (line 856) | def output_size(self):
method zero_state (line 859) | def zero_state(self, batch_size, dtype):
method __call__ (line 863) | def __call__(self, inputs, state, scope=None):
class MultiRNNCell (line 890) | class MultiRNNCell(RNNCell):
method __init__ (line 893) | def __init__(self, cells, state_is_tuple=True):
method state_size (line 922) | def state_size(self):
method output_size (line 929) | def output_size(self):
method zero_state (line 932) | def zero_state(self, batch_size, dtype):
method __call__ (line 941) | def __call__(self, inputs, state, scope=None):
class _SlimRNNCell (line 966) | class _SlimRNNCell(RNNCell):
method __init__ (line 969) | def __init__(self, cell_fn):
method state_size (line 998) | def state_size(self):
method output_size (line 1002) | def output_size(self):
method __call__ (line 1005) | def __call__(self, inputs, state, scope=None):
function _linear (line 1011) | def _linear(args, output_size, bias, bias_start=0.0, scope=None):
FILE: model.py
function sample_Z (line 25) | def sample_Z(batch_size, seq_length, latent_dim, use_time=False, use_noi...
function sample_T (line 34) | def sample_T(batch_size, batch_idx):
function sample_TT (line 46) | def sample_TT(batch_size):
function train_epoch (line 57) | def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_so...
function GAN_loss (line 88) | def GAN_loss(Z, X, generator_settings, discriminator_settings):
function GAN_solvers (line 114) | def GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_example...
function create_placeholders (line 150) | def create_placeholders(batch_size, seq_length, latent_dim, num_signals):
function generator (line 157) | def generator(z, hidden_units_g, seq_length, batch_size, num_signals, re...
function discriminator (line 209) | def discriminator(x, hidden_units_d, seq_length, batch_size, reuse=False...
function display_batch_progression (line 250) | def display_batch_progression(j, id_max):
function dump_parameters (line 261) | def dump_parameters(identifier, sess):
function load_parameters (line 275) | def load_parameters(identifier):
FILE: plotting.py
function plot_label (line 10) | def plot_label(label, id):
function visualise_at_epoch (line 20) | def visualise_at_epoch(vis_sample, data, predict_labels, epoch,
function save_plot_sample (line 48) | def save_plot_sample(samples, idx, identifier, n_samples=16, num_epochs=...
function save_plot_interpolate (line 77) | def save_plot_interpolate(input_samples, samples, idx, identifier, num_...
function reconstruction_errors (line 130) | def reconstruction_errors(identifier, train_errors, vali_errors,
function save_plot_reconstruct (line 158) | def save_plot_reconstruct(real_samples, model_samples, identifier):
function save_plot_vary_dimension (line 180) | def save_plot_vary_dimension(samples_list, idx, identifier, n_dim):
function interpolate (line 213) | def interpolate(sampleA, sampleB=None, n_steps=6):
function vary_latent_dimension (line 224) | def vary_latent_dimension(sample, dimension, n_steps=6):
function plot_sine_evaluation (line 235) | def plot_sine_evaluation(real_samples, fake_samples, idx, identifier):
function plot_trace (line 270) | def plot_trace(identifier, xmax=250, final=False, dp=False):
function save_samples (line 386) | def save_samples(vis_sample, identifier, epoch):
function save_samples_real (line 392) | def save_samples_real(vis_real, identifier):
function save_mnist_plot_sample (line 398) | def save_mnist_plot_sample(samples, idx, identifier, n_samples, labels=N...
function visualise_latent (line 436) | def visualise_latent(Z, identifier):
function plot_parameters (line 456) | def plot_parameters(parameters, identifier):
function view_mnist_eval (line 491) | def view_mnist_eval(identifier, train_X, train_Y, synth_X, synth_Y, test...
function nips_plot_rbf (line 544) | def nips_plot_rbf(sample, index, which='train'):
function nips_plot_sine (line 570) | def nips_plot_sine(sample, index, which='train'):
function nips_plot_mnist (line 599) | def nips_plot_mnist(sample, index, which='train'):
FILE: tf_ops.py
function sq_sum (line 5) | def sq_sum(t, name=None):
function dot (line 12) | def dot(x, y, name=None):
FILE: utils.py
function rgan_options_parser (line 6) | def rgan_options_parser():
function load_settings_from_file (line 93) | def load_settings_from_file(settings):
Condensed preview — 45 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (265K chars).
[
{
"path": ".gitattributes",
"chars": 66,
"preview": "# Auto detect text files and perform LF normalization\n* text=auto\n"
},
{
"path": "AD.py",
"chars": 6507,
"preview": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nimport model\r\nfrom mod_core_rnn_cell_impl import L"
},
{
"path": "AD_Invert.py",
"chars": 7849,
"preview": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nfrom mod_core_rnn_cell_impl import LSTMCell # mod"
},
{
"path": "DR_discriminator.py",
"chars": 25532,
"preview": "import numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import hsv_to_rgb\nimpo"
},
{
"path": "README.md",
"chars": 1790,
"preview": "# -- Multivariate Anomaly Detection for Time Series Data with GANs -- #\n\n# MAD-GAN\n\nThis repository contains code for th"
},
{
"path": "RGAN.py",
"chars": 8594,
"preview": "import numpy as np\r\nimport tensorflow as tf\r\nimport pdb\r\nimport random\r\nimport json\r\nfrom scipy.stats import mode\r\n\r\nimp"
},
{
"path": "data_utils.py",
"chars": 24447,
"preview": "import numpy as np\nimport pandas as pd\nimport pdb\nimport re\nfrom time import time\nimport json\nimport random\n\nimport mode"
},
{
"path": "differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py",
"chars": 10050,
"preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
},
{
"path": "differential_privacy/dp_sgd/dp_optimizer/sanitizer.py",
"chars": 4587,
"preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
},
{
"path": "differential_privacy/dp_sgd/dp_optimizer/utils.py",
"chars": 10974,
"preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
},
{
"path": "differential_privacy/privacy_accountant/tf/accountant.py",
"chars": 17725,
"preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
},
{
"path": "eugenium_mmd.py",
"chars": 6961,
"preview": "'''\nCode taken from: https://github.com/eugenium/mmd\n(modified slightly for efficiency/PEP by Stephanie Hyland)\n\nPython "
},
{
"path": "eval.py",
"chars": 36174,
"preview": "#!/usr/bin/env ipython\n# Evaluation of models\n#\n\nimport json\nimport pdb\nimport numpy as np\nimport pandas as pd\nfrom euge"
},
{
"path": "experiments/settings/kdd99.txt",
"chars": 891,
"preview": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,\r\n\"fr"
},
{
"path": "experiments/settings/kdd99_test.txt",
"chars": 901,
"preview": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99_test\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,"
},
{
"path": "mmd.py",
"chars": 8101,
"preview": "'''\nMMD functions implemented in tensorflow.\n(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py)\n'"
},
{
"path": "mod_core_rnn_cell_impl.py",
"chars": 41293,
"preview": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
},
{
"path": "model.py",
"chars": 12352,
"preview": "import tensorflow as tf\nimport numpy as np\n# from data_utils import get_batch\nimport data_utils\nimport pdb\nimport json\ni"
},
{
"path": "plotting.py",
"chars": 23410,
"preview": "import numpy as np\nimport matplotlib as mpl\nmpl.use('Agg')\nimport matplotlib.pyplot as plt\nimport pdb\nfrom time import t"
},
{
"path": "tf_ops.py",
"chars": 681,
"preview": "### from https://github.com/eugenium/MMD/blob/master/tf_ops.py\nimport tensorflow as tf\n\n\ndef sq_sum(t, name=None):\n \""
},
{
"path": "utils.py",
"chars": 6099,
"preview": "#!/usr/bin/env ipython\n# Utility functions that don't fit in other scripts\nimport argparse\nimport json\n\ndef rgan_options"
}
]
// ... and 24 more files (download for full content)
About this extraction
This page contains the full source code of the LiDan456/MAD-GANs GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 45 files (249.0 KB), approximately 68.1k tokens, and a symbol index with 216 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.