Full Code of LiDan456/MAD-GANs for AI

master 3139a73a4112 cached
45 files
249.0 KB
68.1k tokens
216 symbols
1 requests
Download .txt
Showing preview only (260K chars total). Download the full file or copy to clipboard to get everything.
Repository: LiDan456/MAD-GANs
Branch: master
Commit: 3139a73a4112
Files: 45
Total size: 249.0 KB

Directory structure:
gitextract_fejrc7rh/

├── .gitattributes
├── AD.py
├── AD_Invert.py
├── DR_discriminator.py
├── README.md
├── RGAN.py
├── data_utils.py
├── differential_privacy/
│   ├── dp_sgd/
│   │   └── dp_optimizer/
│   │       ├── dp_optimizer.py
│   │       ├── sanitizer.py
│   │       └── utils.py
│   └── privacy_accountant/
│       └── tf/
│           └── accountant.py
├── eugenium_mmd.py
├── eval.py
├── experiments/
│   ├── parameters/
│   │   ├── kdd99_30_0.npy
│   │   ├── kdd99_30_1.npy
│   │   ├── kdd99_30_10.npy
│   │   ├── kdd99_30_11.npy
│   │   ├── kdd99_30_12.npy
│   │   ├── kdd99_30_13.npy
│   │   ├── kdd99_30_14.npy
│   │   ├── kdd99_30_15.npy
│   │   ├── kdd99_30_16.npy
│   │   ├── kdd99_30_17.npy
│   │   ├── kdd99_30_18.npy
│   │   ├── kdd99_30_19.npy
│   │   ├── kdd99_30_2.npy
│   │   ├── kdd99_30_20.npy
│   │   ├── kdd99_30_21.npy
│   │   ├── kdd99_30_22.npy
│   │   ├── kdd99_30_3.npy
│   │   ├── kdd99_30_4.npy
│   │   ├── kdd99_30_5.npy
│   │   ├── kdd99_30_6.npy
│   │   ├── kdd99_30_7.npy
│   │   ├── kdd99_30_8.npy
│   │   └── kdd99_30_9.npy
│   ├── plots/
│   │   └── gs/
│   │       └── kdd99_gs_real.npy
│   └── settings/
│       ├── kdd99.txt
│       └── kdd99_test.txt
├── mmd.py
├── mod_core_rnn_cell_impl.py
├── model.py
├── plotting.py
├── tf_ops.py
└── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
* text=auto


================================================
FILE: AD.py
================================================
import tensorflow as tf
import numpy as np
import pdb
import json
import model
from mod_core_rnn_cell_impl import LSTMCell  # modified to allow initializing bias in lstm

import utils
import eval
import DR_discriminator
import data_utils

# from pyod.utils.utility import *
from sklearn.utils.validation import *
from sklearn.metrics.classification import *
from sklearn.metrics.ranking import *
from time import time

begin = time()

"""
Here, only the discriminator was used to do the anomaly detection
"""

# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)

# --- get data, split --- #
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
settings["eval_single"] = False
settings["eval_an"] = False
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
                                             settings["num_signals"], settings["sub_id"], settings["eval_single"],
                                             settings["eval_an"], data_path)
# --- save settings, data --- #
# no need
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)

class myADclass():
    def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):
        self.epoch = epoch
        self.settings = settings
        self.samples = samples
        self.labels = labels
        self.index = index
    def ADfunc(self):
        num_samples_t = self.samples.shape[0]
        print('sample_shape:', self.samples.shape[0])
        print('num_samples_t', num_samples_t)

        # -- only discriminate one batch for one time -- #
        D_test = np.empty([num_samples_t, self.settings['seq_length'], 1])
        DL_test = np.empty([num_samples_t, self.settings['seq_length'], 1])
        L_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])
        I_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])
        batch_times = num_samples_t // self.settings['batch_size']
        for batch_idx in range(0, num_samples_t // self.settings['batch_size']):
            # print('batch_idx:{}
            # display batch progress
            model.display_batch_progression(batch_idx, batch_times)
            start_pos = batch_idx * self.settings['batch_size']
            end_pos = start_pos + self.settings['batch_size']
            T_mb = self.samples[start_pos:end_pos, :, :]
            L_mmb = self.labels[start_pos:end_pos, :, :]
            I_mmb = self.index[start_pos:end_pos, :, :]
            para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
                self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
            D_t, L_t = DR_discriminator.dis_trained_model(self.settings, T_mb, para_path)
            D_test[start_pos:end_pos, :, :] = D_t
            DL_test[start_pos:end_pos, :, :] = L_t
            L_mb[start_pos:end_pos, :, :] = L_mmb
            I_mb[start_pos:end_pos, :, :] = I_mmb

        start_pos = (num_samples_t // self.settings['batch_size']) * self.settings['batch_size']
        end_pos = start_pos + self.settings['batch_size']
        size = samples[start_pos:end_pos, :, :].shape[0]
        fill = np.ones([self.settings['batch_size'] - size, samples.shape[1], samples.shape[2]])
        batch = np.concatenate([samples[start_pos:end_pos, :, :], fill], axis=0)
        para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
            self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
        D_t, L_t = DR_discriminator.dis_trained_model(self.settings, batch, para_path)
        L_mmb = self.labels[start_pos:end_pos, :, :]
        I_mmb = self.index[start_pos:end_pos, :, :]
        D_test[start_pos:end_pos, :, :] = D_t[:size, :, :]
        DL_test[start_pos:end_pos, :, :] = L_t[:size, :, :]
        L_mb[start_pos:end_pos, :, :] = L_mmb
        I_mb[start_pos:end_pos, :, :] = I_mmb

        results = np.zeros([18, 4])
        for i in range(2, 8):
            tao = 0.1 * i
            Accu2, Pre2, Rec2, F12 = DR_discriminator.detection_Comb(
                DL_test, L_mb, I_mb, self.settings['seq_step'], tao)
            print('seq_length:', self.settings['seq_length'])
            print('Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
                  .format(self.epoch, tao, Accu2, Pre2, Rec2, F12))
            results[i - 2, :] = [Accu2, Pre2, Rec2, F12]

            Accu3, Pre3, Rec3, F13 = DR_discriminator.detection_Comb(
                D_test, L_mb, I_mb, self.settings['seq_step'], tao)
            print('seq_length:', self.settings['seq_length'])
            print('Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
                  .format(self.epoch, tao, Accu3, Pre3, Rec3, F13))
            results[i - 2+6, :] = [Accu3, Pre3, Rec3, F13]

            Accu5, Pre5, Rec5, F15 = DR_discriminator.sample_detection(D_test, L_mb, tao)
            print('seq_length:', self.settings['seq_length'])
            print('sample-wise-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'
                  .format(self.epoch, tao, Accu5, Pre5, Rec5, F15))
            results[i - 2+12, :] = [Accu5, Pre5, Rec5, F15]

        return results

if __name__ == "__main__":
    print('Main Starting...')

    Results = np.empty([settings['num_epochs'], 18, 4])

    for epoch in range(settings['num_epochs']):
    # for epoch in range(50, 60):
        ob = myADclass(epoch)
        Results[epoch, :, :] = ob.ADfunc()

    # res_path = './experiments/plots/Results' + '_' + settings['sub_id'] + '_' + str(
    #     settings['seq_length']) + '.npy'
    # np.save(res_path, Results)

    print('Main Terminating...')
    end = time() - begin
    print('Testing terminated | Training time=%d s' % (end))

================================================
FILE: AD_Invert.py
================================================
import tensorflow as tf
import numpy as np
import pdb
import json
from mod_core_rnn_cell_impl import LSTMCell  # modified to allow initializing bias in lstm

import data_utils
import plotting
import model
import mmd
import utils
import eval
import DR_discriminator

from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer
from differential_privacy.dp_sgd.dp_optimizer import sanitizer
from differential_privacy.privacy_accountant.tf import accountant

"""
Here, both the discriminator and generator were used to do the anomaly detection
"""

# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)

# --- get data, split --- #
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
                                             settings["num_signals"], settings["sub_id"], settings["eval_single"],
                                             settings["eval_an"], data_path)
# --- save settings, data --- #
# no need
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)

class myADclass():
    def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):
        self.epoch = epoch
        self.settings = settings
        self.samples = samples
        self.labels = labels
        self.index = index
    def ADfunc(self):
        num_samples_t = self.samples.shape[0]
        t_size = 500
        T_index = np.random.choice(num_samples_t, size=t_size, replace=False)
        print('sample_shape:', self.samples.shape[0])
        print('num_samples_t', num_samples_t)

        # -- only discriminate one batch for one time -- #
        D_test = np.empty([t_size, self.settings['seq_length'], 1])
        DL_test = np.empty([t_size, self.settings['seq_length'], 1])
        GG = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])
        T_samples = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])
        L_mb = np.empty([t_size, self.settings['seq_length'], 1])
        I_mb = np.empty([t_size, self.settings['seq_length'], 1])
        for batch_idx in range(0, t_size):
            # print('epoch:{}'.format(self.epoch))
            # print('batch_idx:{}'.format(batch_idx))
            # display batch progress
            model.display_batch_progression(batch_idx, t_size)
            T_mb = self.samples[T_index[batch_idx], :, :]
            L_mmb = self.labels[T_index[batch_idx], :, :]
            I_mmb = self.index[T_index[batch_idx], :, :]
            para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(
                self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'
            D_t, L_t = DR_discriminator.dis_D_model(self.settings, T_mb, para_path)
            Gs, Zs, error_per_sample, heuristic_sigma = DR_discriminator.invert(self.settings, T_mb, para_path,
                                                                                g_tolerance=None,
                                                                                e_tolerance=0.1, n_iter=None,
                                                                                max_iter=1000,
                                                                                heuristic_sigma=None)
            GG[batch_idx, :, :] = Gs
            T_samples[batch_idx, :, :] = T_mb
            D_test[batch_idx, :, :] = D_t
            DL_test[batch_idx, :, :] = L_t
            L_mb[batch_idx, :, :] = L_mmb
            I_mb[batch_idx, :, :] = I_mmb

        # -- use self-defined evaluation functions -- #
        # -- test different tao values for the detection function -- #
        results = np.empty([5, 5])
        # for i in range(2, 8):
        #     tao = 0.1 * i
        tao = 0.5
        lam = 0.8
        Accu1, Pre1, Rec1, F11, FPR1, D_L1 = DR_discriminator.detection_D_I(DL_test, L_mb, I_mb, self.settings['seq_step'], tao)
        print('seq_length:', self.settings['seq_length'])
        print('D:Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
              .format(self.epoch, tao, Accu1, Pre1, Rec1, F11, FPR1))
        results[0, :] = [Accu1, Pre1, Rec1, F11, FPR1]

        Accu2, Pre2, Rec2, F12, FPR2, D_L2 = DR_discriminator.detection_D_I(D_test, L_mb, I_mb, self.settings['seq_step'], tao)
        print('seq_length:', self.settings['seq_length'])
        print('D:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
              .format(self.epoch, tao, Accu2, Pre2, Rec2, F12, FPR2))
        results[1, :] = [Accu2, Pre2, Rec2, F12, FPR2]

        Accu3, Pre3, Rec3, F13, FPR3, D_L3 = DR_discriminator.detection_R_D_I(DL_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)
        print('seq_length:', self.settings['seq_length'])
        print('RD:Comb-logits_based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
            .format(self.epoch, tao, Accu3, Pre3, Rec3, F13, FPR3))
        results[2, :] = [Accu3, Pre3, Rec3, F13, FPR3]

        Accu4, Pre4, Rec4, F14, FPR4, D_L4 = DR_discriminator.detection_R_D_I(D_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)
        print('seq_length:', self.settings['seq_length'])
        print('RD:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
              .format(self.epoch, tao, Accu4, Pre4, Rec4, F14, FPR4))
        results[3, :] = [Accu4, Pre4, Rec4, F14, FPR4]

        Accu5, Pre5, Rec5, F15, FPR5, D_L5 = DR_discriminator.detection_R_I(GG, T_samples, L_mb, self.settings['seq_step'],tao)
        print('seq_length:', self.settings['seq_length'])
        print('G:Comb-sample-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'
              .format(self.epoch, tao, Accu5, Pre5, Rec5, F15, FPR5))
        results[4, :] = [Accu5, Pre5, Rec5, F15, FPR5]

        return results, GG, D_test, DL_test



if __name__ == "__main__":
    print('Main Starting...')

    Results = np.empty([settings['num_epochs'], 5, 5])

    t_size = 500
    D_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])
    DL_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])
    GG = np.empty([settings['num_epochs'], t_size, settings['seq_length'], settings['num_signals']])

    for epoch in range(settings['num_epochs']):
    # for epoch in range(1):
        ob = myADclass(epoch)
        Results[epoch, :, :], GG[epoch, :, :, :], D_test[epoch, :, :, :], DL_test[epoch, :, :, :] = ob.ADfunc()

    res_path = './experiments/plots/Results_Invert' + '_' + settings['sub_id'] + '_' + str(
        settings['seq_length']) + '.npy'
    np.save(res_path, Results)

    dg_path = './experiments/plots/DG_Invert' + '_' + settings['sub_id'] + '_' + str(
        settings['seq_length']) + '_'
    np.save(dg_path + 'D_test.npy', D_test)
    np.save(dg_path + 'DL_test.npy', DL_test)
    np.save(dg_path + 'GG.npy', DL_test)

    print('Main Terminating...')

================================================
FILE: DR_discriminator.py
================================================
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
import model
import mmd
from mod_core_rnn_cell_impl import LSTMCell
from sklearn.metrics import precision_recall_fscore_support

def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier):

    aa = D_test.shape[0]
    bb = D_test.shape[1]
    D_L = D_L.reshape([aa, bb, -1])

    x_points = np.arange(bb)

    fig, ax = plt.subplots(4, 4, sharex=True)
    for m in range(4):
        for n in range(4):
            D = D_test[n * 4 + m, :, :]
            T = T_mb[n * 4 + m, :, :]
            L = L_mb[n * 4 + m, :, :]
            DL = D_L[n * 4 + m, :, :]
            ax[m, n].plot(x_points, D, '--g', label='Pro')
            ax[m, n].plot(x_points, T, 'b', label='Data')
            ax[m, n].plot(x_points, L, 'k', label='Label')
            ax[m, n].plot(x_points, DL, 'r', label='Label')
            ax[m, n].set_ylim(-1, 1)
    for n in range(4):
        ax[-1, n].xaxis.set_ticks(range(0, bb, int(bb/6)))
    fig.suptitle(epoch)
    fig.subplots_adjust(hspace=0.15)
    fig.savefig("./experiments/plots/DR_dis/" + identifier + "_epoch" + str(epoch).zfill(4) + ".png")
    plt.clf()
    plt.close()

    return True

def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao):
    aa = Label_test.shape[0]
    bb = Label_test.shape[1]

    LL = (aa-1)*seq_step+bb

    Label_test = abs(Label_test.reshape([aa, bb]))
    L_mb = L_mb .reshape([aa, bb])
    I_mb = I_mb .reshape([aa, bb])
    D_L = np.zeros([LL, 1])
    L_L = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i*10+j)
            D_L[i*seq_step+j] += Label_test[i, j]
            L_L[i * seq_step + j] += L_mb[i, j]
            Count[i * seq_step + j] += 1

    D_L /= Count
    L_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if D_L[i] > tao:
            # true/negative
            D_L[i] = 0
        else:
            # false/positive
            D_L[i] = 1

    cc = (D_L == L_L)
    # print('D_L:', D_L)
    # print('L_L:', L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)

    print('N:', N)

    Accu = float((N / LL) * 100)

    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')

    return Accu, precision, recall, f1,


def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao):
    aa = DL_test.shape[0]
    bb = DL_test.shape[1]

    LL = (aa-1)*seq_step+bb

    DL_test = abs(DL_test.reshape([aa, bb]))
    L_mb = L_mb .reshape([aa, bb])
    I_mb = I_mb .reshape([aa, bb])
    D_L = np.zeros([LL, 1])
    L_L = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i*10+j)
            D_L[i*seq_step+j] += DL_test[i, j]
            L_L[i * seq_step + j] += L_mb[i, j]
            Count[i * seq_step + j] += 1

    D_L /= Count
    L_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if D_L[i] > tao:
            # true/negative
            D_L[i] = 0
        else:
            # false/positive
            D_L[i] = 1

        A = D_L[i]
        B = L_L[i]
        if A == 1 and B == 1:
            TP += 1
        elif A == 1 and B == 0:
            FP += 1
        elif A == 0 and B == 0:
            TN += 1
        elif A == 0 and B == 1:
            FN += 1


    cc = (D_L == L_L)
    # print('D_L:', D_L)
    # print('L_L:', L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)

    print('N:', N)

    Accu = float((N / LL) * 100)

    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')

    # true positive among all the detected positive
    # Pre = (100 * TP) / (TP + FP + 1)
    # # true positive among all the real positive
    # Rec = (100 * TP) / (TP + FN + 1)
    # # The F1 score is the harmonic average of the precision and recall,
    # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate--false alarm rate
    FPR = (100 * FP) / (FP + TN+1)

    return Accu, precision, recall, f1, FPR, D_L

def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao):
    # point-wise detection for one dimension

    aa = D_test.shape[0]
    bb = D_test.shape[1]

    LL = (aa-1) * seq_step + bb
    # print('aa:', aa)
    # print('bb:', bb)
    # print('LL:', LL)

    D_test = D_test.reshape([aa, bb])
    L_mb = L_mb.reshape([aa, bb])
    I_mb = I_mb.reshape([aa, bb])
    D_L = np.zeros([LL, 1])
    L_L = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i * 10 + j)
            D_L[i * seq_step + j] += D_test[i, j]
            L_L[i * seq_step + j] += L_mb[i, j]
            Count[i * seq_step + j] += 1

    D_L /= Count
    L_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if D_L[i] > tao:
            # true/negative
            D_L[i] = 0
        else:
            # false/positive
            D_L[i] = 1

        A = D_L[i]
        B = L_L[i]
        if A == 1 and B == 1:
            TP += 1
        elif A == 1 and B == 0:
            FP += 1
        elif A == 0 and B == 0:
            TN += 1
        elif A == 0 and B == 1:
            FN += 1

    cc = (D_L == L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)
    Accu = float((N / LL) * 100)

    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')

    # true positive among all the detected positive
    # Pre = (100 * TP) / (TP + FP + 1)
    # # true positive among all the real positive
    # Rec = (100 * TP) / (TP + FN + 1)
    # # The F1 score is the harmonic average of the precision and recall,
    # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate--false alarm rate
    FPR = (100 * FP) / (FP + TN)

    return Accu, precision, recall, f1, FPR, D_L

def detection_D_I(DD, L_mb, I_mb, seq_step, tao):
    # point-wise detection for one dimension

    aa = DD.shape[0]
    bb = DD.shape[1]

    LL = (aa-1)*seq_step+bb

    DD = abs(DD.reshape([aa, bb]))
    L_mb = L_mb .reshape([aa, bb])
    I_mb = I_mb .reshape([aa, bb])
    D_L = np.zeros([LL, 1])
    L_L = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i*10+j)
            D_L[i*10+j] += DD[i, j]
            L_L[i * 10 + j] += L_mb[i, j]
            Count[i * 10 + j] += 1

    D_L /= Count
    L_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if D_L[i] > tao:
            # true/negative
            D_L[i] = 0
        else:
            # false/positive
            D_L[i] = 1

        A = D_L[i]
        B = L_L[i]
        if A == 1 and B == 1:
            TP += 1
        elif A == 1 and B == 0:
            FP += 1
        elif A == 0 and B == 0:
            TN += 1
        elif A == 0 and B == 1:
            FN += 1


    cc = (D_L == L_L)
    # print('D_L:', D_L)
    # print('L_L:', L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)

    print('N:', N)

    Accu = float((N / LL) * 100)

    # true positive among all the detected positive
    Pre = (100 * TP) / (TP + FP + 1)
    # true positive among all the real positive
    Rec = (100 * TP) / (TP + FN + 1)
    # The F1 score is the harmonic average of the precision and recall,
    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate--false alarm rate
    FPR = (100 * FP) / (FP + TN+1)

    return Accu, Pre, Rec, F1, FPR, D_L

def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam):
    # point-wise detection for one dimension
    # (1-lambda)*R(x)+lambda*D(x)
    # lambda=0.5?
    # D_test, Gs, T_mb, L_mb  are of same size

    R = np.absolute(Gs - T_mb)
    R = np.mean(R, axis=2)
    aa = DD.shape[0]
    bb = DD.shape[1]

    LL = (aa - 1) * seq_step + bb

    DD = abs(DD.reshape([aa, bb]))
    DD = 1-DD
    L_mb = L_mb.reshape([aa, bb])
    R = R.reshape([aa, bb])

    D_L = np.zeros([LL, 1])
    R_L = np.zeros([LL, 1])
    L_L = np.zeros([LL, 1])
    L_pre = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i*10+j)
            D_L[i * 10 + j] += DD[i, j]
            L_L[i * 10 + j] += L_mb[i, j]
            R_L[i * 10 + j] += R[i, j]
            Count[i * 10 + j] += 1
    D_L /= Count
    L_L /= Count
    R_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if (1-lam)*R_L[i] + lam*D_L[i] > tao:
            # false
            L_pre[i] = 1
        else:
            # true
            L_pre[i] = 0

        A = L_pre[i]
        # print('A:', A)
        B = L_L[i]
        # print('B:', B)
        if A == 1 and B == 1:
            TP += 1
        elif A == 1 and B == 0:
            FP += 1
        elif A == 0 and B == 0:
            TN += 1
        elif A == 0 and B == 1:
            FN += 1

    cc = (L_pre == L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)
    Accu = float((N / (aa*bb)) * 100)

    # true positive among all the detected positive
    Pre = (100 * TP) / (TP + FP + 1)
    # true positive among all the real positive
    Rec = (100 * TP) / (TP + FN + 1)
    # The F1 score is the harmonic average of the precision and recall,
    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate
    FPR = (100 * FP) / (FP + TN+1)

    return Accu, Pre, Rec, F1, FPR, L_pre

def detection_R_I(Gs, T_mb, L_mb, seq_step, tao):
    # point-wise detection for one dimension
    # (1-lambda)*R(x)+lambda*D(x)
    # lambda=0.5?
    # D_test, Gs, T_mb, L_mb  are of same size

    R = np.absolute(Gs - T_mb)
    R = np.mean(R, axis=2)
    aa = R.shape[0]
    bb = R.shape[1]

    LL = (aa - 1) * seq_step + bb

    L_mb = L_mb.reshape([aa, bb])
    R = R.reshape([aa, bb])

    L_L = np.zeros([LL, 1])
    R_L = np.zeros([LL, 1])
    L_pre = np.zeros([LL, 1])
    Count = np.zeros([LL, 1])
    for i in range(0, aa):
        for j in range(0, bb):
            # print('index:', i*10+j)
            L_L[i * 10 + j] += L_mb[i, j]
            R_L[i * 10 + j] += R[i, j]
            Count[i * 10 + j] += 1
    L_L /= Count
    R_L /= Count

    TP, TN, FP, FN = 0, 0, 0, 0

    for i in range(LL):
        if R_L[i] > tao:
            # false
            L_pre[i] = 1
        else:
            # true
            L_pre[i] = 0

        A = L_pre[i]
        B = L_L[i]
        if A == 1 and B == 1:
            TP += 1
        elif A == 1 and B == 0:
            FP += 1
        elif A == 0 and B == 0:
            TN += 1
        elif A == 0 and B == 1:
            FN += 1

    cc = (L_pre == L_L)
    cc = list(cc.reshape([-1]))
    N = cc.count(True)
    Accu = float((N / (aa*bb)) * 100)

    # true positive among all the detected positive
    Pre = (100 * TP) / (TP + FP + 1)
    # true positive among all the real positive
    Rec = (100 * TP) / (TP + FN + 1)
    # The F1 score is the harmonic average of the precision and recall,
    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate
    FPR = (100 * FP) / (FP + TN+1)

    return Accu, Pre, Rec, F1, FPR, L_pre


def sample_detection(D_test, L_mb, tao):
    # sample-wise detection for one dimension

    aa = D_test.shape[0]
    bb = D_test.shape[1]

    D_test = D_test.reshape([aa, bb])
    L_mb = L_mb.reshape([aa, bb])
    L = np.sum(L_mb, 1)
    # NN = 0-10
    L[L > 0] = 1

    D_L = np.empty([aa, ])

    for i in range(aa):
        if np.mean(D_test[i, :]) > tao:
            # true/negative
            D_L[i] = 0
        else:
            # false/positive
            D_L[i] = 1

    cc = (D_L == L)
    # cc = list(cc)
    N = list(cc).count(True)
    Accu = float((N / (aa)) * 100)

    precision, recall, f1, _ = precision_recall_fscore_support(L, D_L, average='binary')

    return Accu, precision, recall, f1


def CUSUM_det(spe_n, spe_a, labels):

    mu = np.mean(spe_n)
    sigma = np.std(spe_n)

    kk = 3*sigma
    H = 15*sigma
    print('H:', H)

    tar = np.mean(spe_a)

    mm = spe_a.shape[0]

    SH = np.empty([mm, ])
    SL = np.empty([mm, ])

    for i in range(mm):
        SH[-1] = 0
        SL[-1] = 0
        SH[i] = max(0, SH[i-1]+spe_a[i]-(tar+kk))
        SL[i] = min(0, SL[i-1]+spe_a[i]-(tar-kk))


    count = np.empty([mm, ])
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for i in range(mm):
        A = SH[i]
        B = SL[i]
        AA = H
        BB = -H
        if A <= AA and B >= BB:
            count[i] = 0
        else:
            count[i] = 1

        C = count[i]
        D = labels[i]
        if C == 1 and D == 1:
            TP += 1
        elif C == 1 and D == 0:
            FP += 1
        elif C == 0 and D == 0:
            TN += 1
        elif C == 0 and D == 1:
            FN += 1

    cc = (count == labels)
    # cc = list(cc)
    N = list(cc).count(True)
    Accu = float((N / (mm)) * 100)

    # true positive among all the detected positive
    Pre = (100 * TP) / (TP + FP + 1)
    # true positive among all the real positive
    Rec = (100 * TP) / (TP + FN)
    # The F1 score is the harmonic average of the precision and recall,
    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.
    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))
    # False positive rate
    FPR = (100 * FP) / (FP + TN)

    return Accu, Pre, Rec, F1, FPR


def SPE(X, pc):
    a = X.shape[0]
    b = X.shape[1]

    spe = np.empty([a])
    # Square Prediction Error (square of residual distance)
    #  spe = X'(I-PP')X
    I = np.identity(b, float) - np.matmul(pc.transpose(1, 0), pc)
    # I = np.matmul(I, I)
    for i in range(a):
        x = X[i, :].reshape([b, 1])
        y = np.matmul(x.transpose(1, 0), I)
        spe[i] = np.matmul(y, x)

    return spe



def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, learn_scale=True):
    """
    If parameters are supplied, initialise as such
    """
    # It is important to specify different variable scopes for the LSTM cells.
    with tf.variable_scope("generator_o") as scope:

        W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0'])
        b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0'])
        try:
            scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0'])
        except KeyError:
            scale_out_G_initializer = tf.constant_initializer(value=1)
            assert learn_scale
        lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0'])
        bias_start = parameters['generator/rnn/lstm_cell/biases:0']

        W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer)
        b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer)
        scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=False)

        inputs = z

        cell = LSTMCell(num_units=hidden_units_g,
                        state_is_tuple=True,
                        initializer=lstm_initializer,
                        bias_start=bias_start,
                        reuse=reuse)
        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
            cell=cell,
            dtype=tf.float32,
            sequence_length=[seq_length] * batch_size,
            inputs=inputs)
        rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g])
        logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum
        output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1]
        output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features])
    return output_3d


def discriminator_o(x, hidden_units_d, reuse=False, parameters=None):

    with tf.variable_scope("discriminator_0") as scope:

        W_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/W_out_D:0'])
        b_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/b_out_D:0'])

        W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1],  initializer=W_out_D_initializer)
        b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=b_out_D_initializer)


        inputs = x

        cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse)

        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, inputs=inputs)


        logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum

        output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability??
        # sigmoid output ([0,1]), Probability?

    return output, logits


def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1,
           n_iter=None, max_iter=10000, heuristic_sigma=None):
    """
    Return the latent space points corresponding to a set of a samples
    ( from gradient descent )
    Note: this function is designed for ONE sample generation
    """
    # num_samples = samples.shape[0]
    # cast samples to float32

    samples = np.float32(samples)

    # get the model
    # if settings is a string, assume it's an identifier and load
    if type(settings) == str:
        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))



    # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,)
    # if not g_tolerance is None:
    #     print('until gradient norm is below', g_tolerance)
    # else:
    #     print('until error is below', e_tolerance)


    # get parameters
    parameters = model.load_parameters(para_path)
    # # assertions
    # assert samples.shape[2] == settings['num_generated_features']
    # create VARIABLE Z
    Z = tf.get_variable(name='Z', shape=[1, settings['seq_length'],
                                         settings['latent_dim']],
                        initializer=tf.random_normal_initializer())
    # create outputs

    G_samples = generator_o(Z, settings['hidden_units_g'], settings['seq_length'],
                          1, settings['num_generated_features'],
                          reuse=False, parameters=parameters)
    # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale']
    # generator_settings = dict((k, settings[k]) for k in generator_vars)
    # G_samples = model.generator(Z, **generator_settings, reuse=True)

    fd = None

    # define loss mmd-based loss
    if heuristic_sigma is None:
        heuristic_sigma = mmd.median_pairwise_distance_o(samples)  # this is noisy
        print('heuristic_sigma:', heuristic_sigma)
    samples = tf.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])
    Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1)))
    similarity_per_sample = tf.diag_part(Kxy)
    reconstruction_error_per_sample = 1 - similarity_per_sample
    # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2])
    similarity = tf.reduce_mean(similarity_per_sample)
    reconstruction_error = 1 - similarity
    # updater
    #    solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z])
    # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z])
    solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(reconstruction_error_per_sample, var_list=[Z])
    # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z])

    grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0]
    grad_per_Z = tf.norm(grad_Z, axis=(1, 2))
    grad_norm = tf.reduce_mean(grad_per_Z)
    # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z])
    print('Finding latent state corresponding to samples...')

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        error = sess.run(reconstruction_error, feed_dict=fd)
        g_n = sess.run(grad_norm, feed_dict=fd)
        # print(g_n)
        i = 0
        if not n_iter is None:
            while i < n_iter:
                _ = sess.run(solver, feed_dict=fd)
                error = sess.run(reconstruction_error, feed_dict=fd)
                i += 1
        else:
            if not g_tolerance is None:
                while g_n > g_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error, g_n = sess.run([reconstruction_error, grad_norm], feed_dict=fd)
                    i += 1
                    print(error, g_n)
                    if i > max_iter:
                        break
            else:
                while np.abs(error) > e_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error = sess.run(reconstruction_error, feed_dict=fd)
                    i += 1
                    # print(error)
                    if i > max_iter:
                        break
        Zs = sess.run(Z, feed_dict=fd)
        Gs = sess.run(G_samples, feed_dict={Z: Zs})
        error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd)
        print('Z found in', i, 'iterations with final reconstruction error of', error)
    tf.reset_default_graph()

    return Gs, Zs, error_per_sample, heuristic_sigma


def dis_trained_model(settings, samples, para_path):
    """
    Return the discrimination results of  num_samples testing samples from a trained model described by settings dict
    Note: this function is designed for ONE sample discrimination
    """

    # if settings is a string, assume it's an identifier and load
    if type(settings) == str:
        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))

    num_samples = samples.shape[0]
    samples = np.float32(samples)
    num_variables = samples.shape[2]
    # samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])

    # get the parameters, get other variables
    # parameters = model.load_parameters(settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch))
    parameters = model.load_parameters(para_path)
    # settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch)

    # create placeholder, T samples
    # T = tf.placeholder(tf.float32, [settings['batch_size'], settings['seq_length'], settings['num_generated_features']])

    T = tf.placeholder(tf.float32, [num_samples, settings['seq_length'], num_variables])

    # create the discriminator (GAN)
    # normal GAN
    D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # with tf.device('/gpu:1'):
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})

    tf.reset_default_graph()
    return D_t, L_t

def dis_D_model(settings, samples, para_path):
    """
    Return the discrimination results of  num_samples testing samples from a trained model described by settings dict
    Note: this function is designed for ONE sample discrimination
    """

    # if settings is a string, assume it's an identifier and load
    if type(settings) == str:
        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))

    # num_samples = samples.shape[0]
    samples = np.float32(samples)
    samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])

    # get the parameters, get other variables
    parameters = model.load_parameters(para_path)
    # create placeholder, T samples

    T = tf.placeholder(tf.float32, [1, settings['seq_length'], settings['num_generated_features']])

    # create the discriminator (GAN or CGAN)
    # normal GAN
    D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)
    # D_t, L_t = model.discriminator(T, settings['hidden_units_d'], settings['seq_length'], num_samples, reuse=False,
    #               parameters=parameters, cond_dim=0, c=None, batch_mean=False)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})

    tf.reset_default_graph()
    return D_t, L_t

================================================
FILE: README.md
================================================
# -- Multivariate Anomaly Detection for Time Series Data with GANs -- #

# MAD-GAN

This repository contains code for the paper, _[MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks](https://arxiv.org/pdf/1901.04997.pdf)_, by Dan Li, Dacheng Chen, Jonathan Goh, and See-Kiong Ng.

MAD-GAN is a refined version of GAN-AD at _[Anomaly Detection with Generative Adversarial Networks for Multivariate Time Series](https://arxiv.org/pdf/1809.04758.pdf)_ The code can be found at https://github.com/LiDan456/GAN-AD

(We are still working on this topic, will upload the completed version later...)

## Overview

We used generative adversarial networks (GANs) to do anomaly detection for time series data.
The GAN framework was **R**GAN, whihc was taken from the paper, _[Real-valued (Medical) Time Series Generation with Recurrent Conditional GANs](https://arxiv.org/abs/1706.02633).
Please refer to https://github.com/ratschlab/RGAN for the original code.

## Quickstart

- Python3

- Please unpack the data.7z file in the data folder before run RGAN.py and AD.py

- To train the model:
  
  """ python RGAN.py --settings_file kdd99 """

- To do anomaly detection:

  """ python AD.py --settings_file kdd99_test"""
  
  """ python AD_Invert.py --settings_file kdd99_test"""

## Data

We apply our method on the SWaT and WADI datasets in the paper, however, we didn't upload the data in this repository. Please refer to https://itrust.sutd.edu.sg/ and send request to iTrust is you want to try the data.

In this repository we used kdd cup 1999 dataset as an example (please unpack the data.7z file in the data folder before run RGAN.py and AD.py). You can also down load the original data at http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html



================================================
FILE: RGAN.py
================================================
import numpy as np
import tensorflow as tf
import pdb
import random
import json
from scipy.stats import mode

import data_utils
import plotting
import model
import utils
import eval
import DR_discriminator

from time import time
from math import floor
from mmd import rbf_mmd2, median_pairwise_distance, mix_rbf_mmd2_and_ratio

begin = time()

tf.logging.set_verbosity(tf.logging.ERROR)

# --- get settings --- #
# parse command line arguments, or use defaults
parser = utils.rgan_options_parser()
settings = vars(parser.parse_args())
# if a settings file is specified, it overrides command line arguments/defaults
if settings['settings_file']: settings = utils.load_settings_from_file(settings)

# --- get data, split --- #
# samples, pdf, labels = data_utils.get_data(settings)
data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
print('Loading data from', data_path)
settings["eval_an"] = False
settings["eval_single"] = False
samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"],
                                             settings["num_signals"], settings['sub_id'], settings["eval_single"],
                                             settings["eval_an"], data_path)
print('samples_size:',samples.shape)
# -- number of variables -- #
num_variables = samples.shape[2]
print('num_variables:', num_variables)
# --- save settings, data --- #
print('Ready to run with settings:')
for (k, v) in settings.items(): print(v, '\t', k)
# add the settings to local environment
# WARNING: at this point a lot of variables appear
locals().update(settings)
json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)

# --- build model --- #
# preparation: data placeholders and model parameters
Z, X, T = model.create_placeholders(batch_size, seq_length, latent_dim, num_variables)
discriminator_vars = ['hidden_units_d', 'seq_length', 'batch_size', 'batch_mean']
discriminator_settings = dict((k, settings[k]) for k in discriminator_vars)
generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'learn_scale']
generator_settings = dict((k, settings[k]) for k in generator_vars)
generator_settings['num_signals'] = num_variables

# model: GAN losses
D_loss, G_loss = model.GAN_loss(Z, X, generator_settings, discriminator_settings)
D_solver, G_solver, priv_accountant = model.GAN_solvers(D_loss, G_loss, learning_rate, batch_size,
                                                        total_examples=samples.shape[0],
                                                        l2norm_bound=l2norm_bound,
                                                        batches_per_lot=batches_per_lot, sigma=dp_sigma, dp=dp)
# model: generate samples for visualization
G_sample = model.generator(Z, **generator_settings, reuse=True)


# # --- evaluation settings--- #
#
# # frequency to do visualisations
# num_samples = samples.shape[0]
# vis_freq = max(6600 // num_samples, 1)
# eval_freq = max(6600// num_samples, 1)
#
# # get heuristic bandwidth for mmd kernel from evaluation samples
# heuristic_sigma_training = median_pairwise_distance(samples)
# best_mmd2_so_far = 1000
#
# # optimise sigma using that (that's t-hat)
# batch_multiplier = 5000 // batch_size
# eval_size = batch_multiplier * batch_size
# eval_eval_size = int(0.2 * eval_size)
# eval_real_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])
# eval_sample_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])
# n_sigmas = 2
# sigma = tf.get_variable(name='sigma', shape=n_sigmas, initializer=tf.constant_initializer(
#     value=np.power(heuristic_sigma_training, np.linspace(-1, 3, num=n_sigmas))))
# mmd2, that = mix_rbf_mmd2_and_ratio(eval_real_PH, eval_sample_PH, sigma)
# with tf.variable_scope("SIGMA_optimizer"):
#     sigma_solver = tf.train.RMSPropOptimizer(learning_rate=0.05).minimize(-that, var_list=[sigma])
#     # sigma_solver = tf.train.AdamOptimizer().minimize(-that, var_list=[sigma])
#     # sigma_solver = tf.train.AdagradOptimizer(learning_rate=0.1).minimize(-that, var_list=[sigma])
# sigma_opt_iter = 2000
# sigma_opt_thresh = 0.001
# sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name]


# --- run the program --- #
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
# sess = tf.Session()
sess.run(tf.global_variables_initializer())

# # -- plot the real samples -- #
vis_real_indices = np.random.choice(len(samples), size=16)
vis_real = np.float32(samples[vis_real_indices, :, :])
plotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)
plotting.save_samples_real(vis_real, identifier)

# --- train --- #
train_vars = ['batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim']
train_settings = dict((k, settings[k]) for k in train_vars)
train_settings['num_signals'] = num_variables

t0 = time()
MMD = np.zeros([num_epochs, ])

for epoch in range(num_epochs):
# for epoch in range(1):
    # -- train epoch -- #
    D_loss_curr, G_loss_curr = model.train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss,
                                                 D_solver, G_solver, **train_settings)

    # # -- eval -- #
    # # visualise plots of generated samples, with/without labels
    # # choose which epoch to visualize
    #
    # # random input vectors for the latent space, as the inputs of generator
    # vis_ZZ = model.sample_Z(batch_size, seq_length, latent_dim, use_time)
    #
    # # # -- generate samples-- #
    # vis_sample = sess.run(G_sample, feed_dict={Z: vis_ZZ})
    # # # -- visualize the generated samples -- #
    # plotting.save_plot_sample(vis_sample, epoch, identifier, n_samples=16, num_epochs=None, ncol=4)
    # # plotting.save_plot_sample(vis_sample, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)
    # # # save the generated samples in cased they might be useful for comparison
    # plotting.save_samples(vis_sample, identifier, epoch)

    # -- print -- #
    print('epoch, D_loss_curr, G_loss_curr, seq_length')
    print('%d\t%.4f\t%.4f\t%d' % (epoch, D_loss_curr, G_loss_curr, seq_length))

    # # -- compute mmd2 and if available, prob density -- #
    # if epoch % eval_freq == 0:
    #     # how many samples to evaluate with?
    #     eval_Z = model.sample_Z(eval_size, seq_length, latent_dim, use_time)
    #     eval_sample = np.empty(shape=(eval_size, seq_length, num_signals))
    #     for i in range(batch_multiplier):
    #         eval_sample[i * batch_size:(i + 1) * batch_size, :, :] = sess.run(G_sample, feed_dict={ Z: eval_Z[i * batch_size:(i + 1) * batch_size]})
    #     eval_sample = np.float32(eval_sample)
    #     eval_real = np.float32(samples['vali'][np.random.choice(len(samples['vali']), size=batch_multiplier * batch_size), :, :])
    #
    #     eval_eval_real = eval_real[:eval_eval_size]
    #     eval_test_real = eval_real[eval_eval_size:]
    #     eval_eval_sample = eval_sample[:eval_eval_size]
    #     eval_test_sample = eval_sample[eval_eval_size:]
    #
    #     # MMD
    #     # reset ADAM variables
    #     sess.run(tf.initialize_variables(sigma_opt_vars))
    #     sigma_iter = 0
    #     that_change = sigma_opt_thresh * 2
    #     old_that = 0
    #     while that_change > sigma_opt_thresh and sigma_iter < sigma_opt_iter:
    #         new_sigma, that_np, _ = sess.run([sigma, that, sigma_solver],
    #                                          feed_dict={eval_real_PH: eval_eval_real, eval_sample_PH: eval_eval_sample})
    #         that_change = np.abs(that_np - old_that)
    #         old_that = that_np
    #         sigma_iter += 1
    #     opt_sigma = sess.run(sigma)
    #     try:
    #         mmd2, that_np = sess.run(mix_rbf_mmd2_and_ratio(eval_test_real, eval_test_sample, biased=False, sigmas=sigma))
    #     except ValueError:
    #         mmd2 = 'NA'
    #         that = 'NA'
    #
    #     MMD[epoch, ] = mmd2

    # -- save model parameters -- #
    model.dump_parameters(sub_id + '_' + str(seq_length) + '_' + str(epoch), sess)

np.save('./experiments/plots/gs/' + identifier + '_' + 'MMD.npy', MMD)

end = time() - begin
print('Training terminated | Training time=%d s' %(end) )

print("Training terminated | training time = %ds  " % (time() - begin))

================================================
FILE: data_utils.py
================================================
import numpy as np
import pandas as pd
import pdb
import re
from time import time
import json
import random

import model

from scipy.spatial.distance import pdist, squareform
from scipy.stats import multivariate_normal, invgamma, mode
from scipy.special import gamma
from scipy.misc.pilutil import imresize
from functools import partial
from math import ceil

from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing


# --- deal with the SWaT data --- #
def swat(seq_length, seq_step, num_signals, randomize=False):
    """ Load and serialise """
    # train = np.load('./data/swat.npy')
    # print('Loaded swat from .npy')
    train = np.loadtxt(open('./data/swat.csv'), delimiter=',')
    print('Loaded swat from .csv')
    m, n = train.shape # m=496800, n=52
    for i in range(n - 1):
        A = max(train[:, i])
        if A != 0:
            train[:, i] /= max(train[:, i])
            # scale from -1 to 1
            train[:, i] = 2 * train[:, i] - 1
        else:
            train[:, i] = train[:, i]

    samples = train[21600:, 0:n-1]
    labels = train[21600:, n-1]    # the last colummn is label
    #############################
    # -- choose variable for uni-variate GAN-AD -- #
    # samples = samples[:, [1, 8, 18, 28]]
    ############################
    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    from sklearn.decomposition import PCA
    # ALL SENSORS IDX
    # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]
    # X_n = samples[:, XS]
    # X_a = samples_a[:, XS]
    # All VARIABLES
    X_n = samples
    ####################################
    ###################################
    # -- the best PC dimension is chosen pc=5 -- #
    n_components = num_signals
    pca = PCA(n_components, svd_solver='full')
    pca.fit(X_n)
    ex_var = pca.explained_variance_ratio_
    pc = pca.components_

    # projected values on the principal component
    T_n = np.matmul(X_n, pc.transpose(1, 0))
    samples = T_n

    # # only for one-dimensional
    # samples = T_n.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    # seq_length = 7200
    num_samples = (samples.shape[0]-seq_length)//seq_step
    print("num_samples:", num_samples)
    print("num_signals:", num_signals)
    aa = np.empty([num_samples, seq_length, num_signals])
    bb = np.empty([num_samples, seq_length, 1])

    for j in range(num_samples):
       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
       for i in range(num_signals):
           aa[j, :, i] = samples[(j * seq_step):(j*seq_step + seq_length), i]

    # samples = aa[:, 0:7200:200, :]
    # labels = bb[:, 0:7200:200, :]
    samples = aa
    labels = bb

    return samples, labels

def swat_birgan(seq_length, seq_step, num_signals, randomize=False):
    """ Load and serialise """
    # train = np.load('./data/swat.npy')
    # print('Loaded swat from .npy')
    train = np.loadtxt(open('./data/swat.csv'), delimiter=',')
    print('Loaded swat from .csv')
    m, n = train.shape # m=496800, n=52
    for i in range(n - 1):
        A = max(train[:, i])
        if A != 0:
            train[:, i] /= max(train[:, i])
            # scale from -1 to 1
            train[:, i] = 2 * train[:, i] - 1
        else:
            train[:, i] = train[:, i]

    samples = train[21600:, 0:n-1]
    labels = train[21600:, n-1]    # the last colummn is label
    #############################
    # # -- choose variable for uni-variate GAN-AD -- #
    # # samples = samples[:, [1, 8, 18, 28]]
    ###########################################
    ###########################################
    nn = samples.shape[1]
    num_samples = (samples.shape[0]-seq_length)//seq_step
    aa = np.empty([num_samples, nn, nn])
    AA = np.empty([seq_length, nn])
    bb = np.empty([num_samples, seq_length, 1])

    print('Pre-process training data...')
    for j in range(num_samples):
       # display batch progress
       model_bigan.display_batch_progression(j, num_samples)
       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
       for i in range(nn):
           AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
       aa[j, :, :] = np.cov(AA.T)

    samples = aa
    labels = bb

    return samples, labels

def swat_test(seq_length, seq_step, num_signals, randomize=False):
    """ Load and serialise """
    # test = np.load('./data/swat_a.npy')
    # print('Loaded swat_a from .npy')
    test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')
    print('Loaded swat_a from .csv')
    m, n = test.shape  # m1=449919, n1=52
    for i in range(n - 1):
        B = max(test[:, i])
        if B != 0:
            test[:, i] /= max(test[:, i])
            # scale from -1 to 1
            test[:, i] = 2 * test[:, i] - 1
        else:
            test[:, i] = test[:, i]

    samples = test[:, 0:n - 1]
    labels = test[:, n - 1]
    idx = np.asarray(list(range(0, m)))  # record the idx of each point
    #############################
    # -- choose variable for uni-variate GAN-AD -- #
    # samples = samples[:, [1,2,3,4]]
    # samples_a = samples_a[:, [1,2,3,4]]
    ############################
    ############################
    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    from sklearn.decomposition import PCA
    import DR_discriminator as dr
    # ALL SENSORS IDX
    # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]
    # X_n = samples[:, XS]
    # X_a = samples_a[:, XS]
    # All VARIABLES
    X_a = samples
    ####################################
    ###################################
    # -- the best PC dimension is chosen pc=5 -- #
    n_components = num_signals
    pca_a = PCA(n_components, svd_solver='full')
    pca_a.fit(X_a)
    pc_a = pca_a.components_
    # projected values on the principal component
    T_a = np.matmul(X_a, pc_a.transpose(1, 0))

    samples = T_a
    # # only for one-dimensional
    # samples = T_a.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    num_samples_t = (samples.shape[0] - seq_length) // seq_step
    aa = np.empty([num_samples_t, seq_length, num_signals])
    bb = np.empty([num_samples_t, seq_length, 1])
    bbb = np.empty([num_samples_t, seq_length, 1])

    for j in range(num_samples_t):
        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        for i in range(num_signals):
            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]

    samples = aa
    labels = bb
    index = bbb

    return samples, labels, index


def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False):
    """ Load and serialise """
    # train = np.load('./data/swat.npy')
    # print('Loaded swat from .npy')
    test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')
    print('Loaded swat_a from .csv')
    m, n = test.shape  # m1=449919, n1=52
    for i in range(n - 1):
        B = max(test[:, i])
        if B != 0:
            test[:, i] /= max(test[:, i])
            # scale from -1 to 1
            test[:, i] = 2 * test[:, i] - 1
        else:
            test[:, i] = test[:, i]

    samples = test[:, 0:n - 1]
    labels = test[:, n - 1]
    # idx = np.asarray(list(range(0, m)))  # record the idx of each point
    #############################
    # # -- choose variable for uni-variate GAN-AD -- #
    # # samples = samples[:, [1, 8, 18, 28]]
    ###########################################
    ###########################################
    nn = samples.shape[1]
    num_samples = (samples.shape[0]-seq_length)//seq_step
    aa = np.empty([num_samples, nn, nn])
    AA = np.empty([seq_length, nn])
    bb = np.empty([num_samples, seq_length, 1])

    print('Pre-process testing data...')
    for j in range(num_samples):
       # display batch progress
       model_bigan.display_batch_progression(j, num_samples)
       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])
       for i in range(nn):
           AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]
       aa[j, :, :] = np.cov(AA.T)

    samples = aa
    labels = bb

    return samples, labels


def wadi(seq_length, seq_step, num_signals, randomize=False):
    train = np.load('./data/wadi.npy')
    print('Loaded wadi from .npy')
    m, n = train.shape  # m=1048571, n=119
    for i in range(n-1):
        A = max(train[:, i])
        if A != 0:
            train[:, i] /= max(train[:, i])
            # scale from -1 to 1
            train[:, i] = 2 * train[:, i] - 1
        else:
            train[:, i] = train[:, i]

    samples = train[259200:, 0:n-1]  # normal
    labels = train[259200:, n-1]
    #############################
    samples = samples[:, [0, 3, 6, 17]]
    # samples = samples[:, 0]
    ############################
    # # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    # from sklearn.decomposition import PCA
    # import DR_discriminator as dr
    # X_n = samples
    # ####################################
    # ###################################
    # # -- the best PC dimension is chosen pc=8 -- #
    # n_components = num_signals
    # pca = PCA(n_components, svd_solver='full')
    # pca.fit(X_n)
    # pc = pca.components_
    # # projected values on the principal component
    # T_n = np.matmul(X_n, pc.transpose(1, 0))
    #
    # samples = T_n
    # # # only for one-dimensional
    # # samples = T_n.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    seq_length = 10800
    num_samples = (samples.shape[0] - seq_length) // seq_step
    print("num_samples:", num_samples)
    print("num_signals:", num_signals)
    aa = np.empty([num_samples, seq_length, num_signals])
    bb = np.empty([num_samples, seq_length, 1])

    for j in range(num_samples):
        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        # aa[j, :, :] = np.reshape(samples[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        for i in range(num_signals):
            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]

    samples = aa[:, 0:10800:300, :]
    labels = bb[:, 0:10800:300, :]

    return samples, labels


def wadi_test(seq_length, seq_step, num_signals, randomize=False):
    test = np.load('./data/wadi_a.npy')
    print('Loaded wadi_a from .npy')
    m, n = test.shape  # m1=172801, n1=119

    for i in range(n - 1):
        B = max(test[:, i])
        if B != 0:
            test[:, i] /= max(test[:, i])
            # scale from -1 to 1
            test[:, i] = 2 * test[:, i] - 1
        else:
            test[:, i] = test[:, i]

    samples = test[:, 0:n - 1]
    labels = test[:, n - 1]
    idx = np.asarray(list(range(0, m)))  # record the idx of each point
    #############################
    ############################
    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    from sklearn.decomposition import PCA
    import DR_discriminator as dr
    X_a = samples
    ####################################
    ###################################
    # -- the best PC dimension is chosen pc=8 -- #
    n_components = num_signals
    pca_a = PCA(n_components, svd_solver='full')
    pca_a.fit(X_a)
    pc_a = pca_a.components_
    # projected values on the principal component
    T_a = np.matmul(X_a, pc_a.transpose(1, 0))

    samples = T_a
    # # only for one-dimensional
    # samples = T_a.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    num_samples_t = (samples.shape[0] - seq_length) // seq_step
    aa = np.empty([num_samples_t, seq_length, num_signals])
    bb = np.empty([num_samples_t, seq_length, 1])
    bbb = np.empty([num_samples_t, seq_length, 1])

    for j in range(num_samples_t):
        bb[j, :, :] = np.reshape(labels[(j * 10):(j * seq_step + seq_length)], [-1, 1])
        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        for i in range(num_signals):
            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]

    samples = aa
    labels = bb
    index = bbb

    return samples, labels, index

def kdd99(seq_length, seq_step, num_signals):
    train = np.load('./data/kdd99_train.npy')
    print('load kdd99_train from .npy')
    m, n = train.shape  # m=562387, n=35
    # normalization
    for i in range(n - 1):
        # print('i=', i)
        A = max(train[:, i])
        # print('A=', A)
        if A != 0:
            train[:, i] /= max(train[:, i])
            # scale from -1 to 1
            train[:, i] = 2 * train[:, i] - 1
        else:
            train[:, i] = train[:, i]

    samples = train[:, 0:n - 1]
    labels = train[:, n - 1]  # the last colummn is label
    #############################
    ############################
    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    from sklearn.decomposition import PCA
    X_n = samples
    ####################################
    ###################################
    # -- the best PC dimension is chosen pc=6 -- #
    n_components = num_signals
    pca = PCA(n_components, svd_solver='full')
    pca.fit(X_n)
    ex_var = pca.explained_variance_ratio_
    pc = pca.components_
    # projected values on the principal component
    T_n = np.matmul(X_n, pc.transpose(1, 0))
    samples = T_n
    # # only for one-dimensional
    # samples = T_n.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    num_samples = (samples.shape[0] - seq_length) // seq_step
    aa = np.empty([num_samples, seq_length, num_signals])
    bb = np.empty([num_samples, seq_length, 1])

    for j in range(num_samples):
        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        for i in range(num_signals):
            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]

    samples = aa
    labels = bb

    return samples, labels

def kdd99_test(seq_length, seq_step, num_signals):
    test = np.load('./data/kdd99_test.npy')
    print('load kdd99_test from .npy')

    m, n = test.shape  # m1=494021, n1=35

    for i in range(n - 1):
        B = max(test[:, i])
        if B != 0:
            test[:, i] /= max(test[:, i])
            # scale from -1 to 1
            test[:, i] = 2 * test[:, i] - 1
        else:
            test[:, i] = test[:, i]

    samples = test[:, 0:n - 1]
    labels = test[:, n - 1]
    idx = np.asarray(list(range(0, m)))  # record the idx of each point
    #############################
    ############################
    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #
    from sklearn.decomposition import PCA
    import DR_discriminator as dr
    X_a = samples
    ####################################
    ###################################
    # -- the best PC dimension is chosen pc=6 -- #
    n_components = num_signals
    pca_a = PCA(n_components, svd_solver='full')
    pca_a.fit(X_a)
    pc_a = pca_a.components_
    # projected values on the principal component
    T_a = np.matmul(X_a, pc_a.transpose(1, 0))
    samples = T_a
    # # only for one-dimensional
    # samples = T_a.reshape([samples.shape[0], ])
    ###########################################
    ###########################################
    num_samples_t = (samples.shape[0] - seq_length) // seq_step
    aa = np.empty([num_samples_t, seq_length, num_signals])
    bb = np.empty([num_samples_t, seq_length, 1])
    bbb = np.empty([num_samples_t, seq_length, 1])

    for j in range(num_samples_t):
        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])
        for i in range(num_signals):
            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]

    samples = aa
    labels = bb
    index = bbb

    return samples, labels, index


# ############################ data pre-processing #################################
# --- to do with loading --- #
# --- to do with loading --- #
def get_samples_and_labels(settings):
    """
    Parse settings options to load or generate correct type of data,
    perform test/train split as necessary, and reform into 'samples' and 'labels'
    dictionaries.
    """
    if settings['data_load_from']:
        data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'
        print('Loading data from', data_path)
        samples, pdf, labels = get_data('load', data_path)
        train, vali, test = samples['train'], samples['vali'], samples['test']
        train_labels, vali_labels, test_labels = labels['train'], labels['vali'], labels['test']
        del samples, labels
    else:
        # generate the data
        data_vars = ['num_samples', 'num_samples_t','seq_length', 'seq_step', 'num_signals', 'freq_low',
                'freq_high', 'amplitude_low', 'amplitude_high', 'scale', 'full_mnist']
        data_settings = dict((k, settings[k]) for k in data_vars if k in settings.keys())
        samples, pdf, labels = get_data(settings['data'], settings['seq_length'], settings['seq_step'], settings['num_signals'], settings['sub_id'])
        if 'multivariate_mnist' in settings and settings['multivariate_mnist']:
            seq_length = samples.shape[1]
            samples = samples.reshape(-1, int(np.sqrt(seq_length)), int(np.sqrt(seq_length)))
        if 'normalise' in settings and settings['normalise']: # TODO this is a mess, fix
            print(settings['normalise'])
            norm = True
        else:
            norm = False
        if labels is None:
            train, vali, test = split(samples, [0.6, 0.2, 0.2], normalise=norm)
            train_labels, vali_labels, test_labels = None, None, None
        else:
            train, vali, test, labels_list = split(samples, [0.6, 0.2, 0.2], normalise=norm, labels=labels)
            train_labels, vali_labels, test_labels = labels_list

    labels = dict()
    labels['train'], labels['vali'], labels['test'] = train_labels, vali_labels, test_labels

    samples = dict()
    samples['train'], samples['vali'], samples['test'] = train, vali, test

    # futz around with labels
    # TODO refactor cause this is messy
    if 'one_hot' in settings and settings['one_hot'] and not settings['data_load_from']:
        if len(labels['train'].shape) == 1:
            # ASSUME labels go from 0 to max_val inclusive, find max-val
            max_val = int(np.max([labels['train'].max(), labels['test'].max(), labels['vali'].max()]))
            # now we have max_val + 1 dimensions
            print('Setting cond_dim to', max_val + 1, 'from', settings['cond_dim'])
            settings['cond_dim'] = max_val + 1
            print('Setting max_val to 1 from', settings['max_val'])
            settings['max_val'] = 1

            labels_oh = dict()
            for (k, v) in labels.items():
                A = np.zeros(shape=(len(v), settings['cond_dim']))
                A[np.arange(len(v)), (v).astype(int)] = 1
                labels_oh[k] = A
            labels = labels_oh
        else:
            assert settings['max_val'] == 1
            # this is already one-hot!

    if 'predict_labels' in settings and settings['predict_labels']:
        samples, labels = data_utils.make_predict_labels(samples, labels)
        print('Setting cond_dim to 0 from', settings['cond_dim'])
        settings['cond_dim'] = 0

    # update the settings dictionary to update erroneous settings
    # (mostly about the sequence length etc. - it gets set by the data!)
    settings['seq_length'] = samples['train'].shape[1]
    settings['num_samples'] = samples['train'].shape[0] + samples['vali'].shape[0] + samples['test'].shape[0]
    settings['num_signals'] = samples['train'].shape[2]

    return samples, pdf, labels


def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_single, eval_an, data_options=None):
    """
    Helper/wrapper function to get the requested data.
    """
    print('data_type')
    labels = None
    index = None
    if data_type == 'load':
        data_dict = np.load(data_options).item()
        samples = data_dict['samples']
        pdf = data_dict['pdf']
        labels = data_dict['labels']
    elif data_type == 'swat':
        samples, labels = swat(seq_length, seq_step, num_signals)
    elif data_type == 'swat_test':
        samples, labels, index = swat_test(seq_length, seq_step, num_signals)
    elif data_type == 'kdd99':
        samples, labels = kdd99(seq_length, seq_step, num_signals)
    elif data_type == 'kdd99_test':
        samples, labels, index = kdd99_test(seq_length, seq_step, num_signals)
    elif data_type == 'wadi':
        samples, labels = wadi(seq_length, seq_step, num_signals)
    elif data_type == 'wadi_test':
        samples, labels, index = wadi_test(seq_length, seq_step, num_signals)
    else:
        raise ValueError(data_type)
    print('Generated/loaded', len(samples), 'samples from data-type', data_type)
    return samples, labels, index


def get_batch(samples, batch_size, batch_idx, labels=None):
    start_pos = batch_idx * batch_size
    end_pos = start_pos + batch_size
    if labels is None:
        return samples[start_pos:end_pos], None
    else:
        if type(labels) == tuple: # two sets of labels
            assert len(labels) == 2
            return samples[start_pos:end_pos], labels[0][start_pos:end_pos], labels[1][start_pos:end_pos]
        else:
            assert type(labels) == np.ndarray
            return samples[start_pos:end_pos], labels[start_pos:end_pos]



def split(samples, proportions, normalise=False, scale=False, labels=None, random_seed=None):
    """
    Return train/validation/test split.
    """
    if random_seed != None:
        random.seed(random_seed)
        np.random.seed(random_seed)
    assert np.sum(proportions) == 1
    n_total = samples.shape[0]
    n_train = ceil(n_total * proportions[0])
    n_test = ceil(n_total * proportions[2])
    n_vali = n_total - (n_train + n_test)
    # permutation to shuffle the samples
    shuff = np.random.permutation(n_total)
    train_indices = shuff[:n_train]
    vali_indices = shuff[n_train:(n_train + n_vali)]
    test_indices = shuff[(n_train + n_vali):]
    # TODO when we want to scale we can just return the indices
    assert len(set(train_indices).intersection(vali_indices)) == 0
    assert len(set(train_indices).intersection(test_indices)) == 0
    assert len(set(vali_indices).intersection(test_indices)) == 0
    # split up the samples
    train = samples[train_indices]
    vali = samples[vali_indices]
    test = samples[test_indices]
    # apply the same normalisation scheme to all parts of the split
    if normalise:
        if scale: raise ValueError(normalise, scale)  # mutually exclusive
        train, vali, test = normalise_data(train, vali, test)
    elif scale:
        train, vali, test = scale_data(train, vali, test)
    if labels is None:
        return train, vali, test
    else:
        print('Splitting labels...')
        if type(labels) == np.ndarray:
            train_labels = labels[train_indices]
            vali_labels = labels[vali_indices]
            test_labels = labels[test_indices]
            labels_split = [train_labels, vali_labels, test_labels]
        elif type(labels) == dict:
            # more than one set of labels!  (weird case)
            labels_split = dict()
            for (label_name, label_set) in labels.items():
                train_labels = label_set[train_indices]
                vali_labels = label_set[vali_indices]
                test_labels = label_set[test_indices]
                labels_split[label_name] = [train_labels, vali_labels, test_labels]
        else:
            raise ValueError(type(labels))
        return train, vali, test, labels_split


================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Differentially private optimizers.
"""
from __future__ import division

import tensorflow as tf

from differential_privacy.dp_sgd.dp_optimizer import utils
#from differential_privacy.dp_sgd.per_example_gradients import per_example_gradients

import pdb

class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):
  """Differentially private gradient descent optimizer.
  """

  def __init__(self, learning_rate, eps_delta, sanitizer,
               sigma=None, use_locking=False, name="DPGradientDescent",
               batches_per_lot=1):
    """Construct a differentially private gradient descent optimizer.

    The optimizer uses fixed privacy budget for each batch of training.

    Args:
      learning_rate: for GradientDescentOptimizer.
      eps_delta: EpsDelta pair for each epoch.
      sanitizer: for sanitizing the graident.
      sigma: noise sigma. If None, use eps_delta pair to compute sigma;
        otherwise use supplied sigma directly.
      use_locking: use locking.
      name: name for the object.
      batches_per_lot: Number of batches in a lot.
    """

    super(DPGradientDescentOptimizer, self).__init__(learning_rate,
                                                     use_locking, name)
    # Also, if needed, define the gradient accumulators
    self._batches_per_lot = batches_per_lot
    self._grad_accum_dict = {}
    if batches_per_lot > 1:
      self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False,
                                      name="batch_count")
      var_list = tf.trainable_variables()
      with tf.variable_scope("grad_acc_for"):
        for var in var_list:
          v_grad_accum = tf.Variable(tf.zeros_like(var),
                                     trainable=False,
                                     name=utils.GetTensorOpName(var))
          self._grad_accum_dict[var.name] = v_grad_accum

    self._eps_delta = eps_delta
    self._sanitizer = sanitizer
    self._sigma = sigma

  def compute_sanitized_gradients(self, loss, var_list=None,
                                  add_noise=True):
    """Compute the sanitized gradients.

    Args:
      loss: the loss tensor.
      var_list: the optional variables.
      add_noise: if true, then add noise. Always clip.
    Returns:
      a pair of (list of sanitized gradients) and privacy spending accumulation
      operations.
    Raises:
      TypeError: if var_list contains non-variable.
    """

    self._assert_valid_dtypes([loss])

    xs = [tf.convert_to_tensor(x) for x in var_list]
    # TODO check this change
    loss_list = tf.unstack(loss, axis=0)
    px_grads_byexample = [tf.gradients(l, xs) for l in loss_list]
    px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))]
    #px_grads = tf.gradients(loss, xs)
    # add a dummy 0th dimension to reflect the fact that we have a batch size of 1...
  #  px_grads = [tf.expand_dims(x, 0) for x in px_grads]
#    px_grads = per_example_gradients.PerExampleGradients(loss, xs)
    sanitized_grads = []
    for px_grad, v in zip(px_grads, var_list):
      tensor_name = utils.GetTensorOpName(v)
      sanitized_grad = self._sanitizer.sanitize(
          px_grad, self._eps_delta, sigma=self._sigma,
          tensor_name=tensor_name, add_noise=add_noise,
          num_examples=self._batches_per_lot * tf.slice(
              tf.shape(px_grad), [0], [1]))
      sanitized_grads.append(sanitized_grad)

    return sanitized_grads

  def minimize(self, loss, global_step=None, var_list=None,
               name=None):
    """Minimize using sanitized gradients.

    This gets a var_list which is the list of trainable variables.
    For each var in var_list, we defined a grad_accumulator variable
    during init. When batches_per_lot > 1, we accumulate the gradient
    update in those. At the end of each lot, we apply the update back to
    the variable. This has the effect that for each lot we compute
    gradients at the point at the beginning of the lot, and then apply one
    update at the end of the lot. In other words, semantically, we are doing
    SGD with one lot being the equivalent of one usual batch of size
    batch_size * batches_per_lot.
    This allows us to simulate larger batches than our memory size would permit.

    The lr and the num_steps are in the lot world.

    Args:
      loss: the loss tensor.
      global_step: the optional global step.
      var_list: the optional variables.
      name: the optional name.
    Returns:
      the operation that runs one step of DP gradient descent.
    """

    # First validate the var_list

    if var_list is None:
      var_list = tf.trainable_variables()
    for var in var_list:
      if not isinstance(var, tf.Variable):
        raise TypeError("Argument is not a variable.Variable: %s" % var)

    # Modification: apply gradient once every batches_per_lot many steps.
    # This may lead to smaller error

    if self._batches_per_lot == 1:
      sanitized_grads = self.compute_sanitized_gradients(
          loss, var_list=var_list)

      grads_and_vars = list(zip(sanitized_grads, var_list))
      self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])

      apply_grads = self.apply_gradients(grads_and_vars,
                                         global_step=global_step, name=name)
      return apply_grads

    # Condition for deciding whether to accumulate the gradient
    # or actually apply it.
    # we use a private self_batch_count to keep track of number of batches.
    # global step will count number of lots processed.

    update_cond = tf.equal(tf.constant(0),
                           tf.mod(self._batch_count,
                                  tf.constant(self._batches_per_lot)))

    # Things to do for batches other than last of the lot.
    # Add non-noisy clipped grads to shadow variables.

    def non_last_in_lot_op(loss, var_list):
      """Ops to do for a typical batch.

      For a batch that is not the last one in the lot, we simply compute the
      sanitized gradients and apply them to the grad_acc variables.

      Args:
        loss: loss function tensor
        var_list: list of variables
      Returns:
        A tensorflow op to do the updates to the gradient accumulators
      """
      sanitized_grads = self.compute_sanitized_gradients(
          loss, var_list=var_list, add_noise=False)

      update_ops_list = []
      for var, grad in zip(var_list, sanitized_grads):
        grad_acc_v = self._grad_accum_dict[var.name]
        update_ops_list.append(grad_acc_v.assign_add(grad))
      update_ops_list.append(self._batch_count.assign_add(1))
      return tf.group(*update_ops_list)

    # Things to do for last batch of a lot.
    # Add noisy clipped grads to accumulator.
    # Apply accumulated grads to vars.

    def last_in_lot_op(loss, var_list, global_step):
      """Ops to do for last batch in a lot.

      For the last batch in the lot, we first add the sanitized gradients to
      the gradient acc variables, and then apply these
      values over to the original variables (via an apply gradient)

      Args:
        loss: loss function tensor
        var_list: list of variables
        global_step: optional global step to be passed to apply_gradients
      Returns:
        A tensorflow op to push updates from shadow vars to real vars.
      """

      # We add noise in the last lot. This is why we need this code snippet
      # that looks almost identical to the non_last_op case here.
      sanitized_grads = self.compute_sanitized_gradients(
          loss, var_list=var_list, add_noise=True)

      normalized_grads = []
      for var, grad in zip(var_list, sanitized_grads):
        grad_acc_v = self._grad_accum_dict[var.name]
        # To handle the lr difference per lot vs per batch, we divide the
        # update by number of batches per lot.
        normalized_grad = tf.div(grad_acc_v.assign_add(grad),
                                 tf.to_float(self._batches_per_lot))

        normalized_grads.append(normalized_grad)

      with tf.control_dependencies(normalized_grads):
        grads_and_vars = list(zip(normalized_grads, var_list))
        self._assert_valid_dtypes(
            [v for g, v in grads_and_vars if g is not None])
        apply_san_grads = self.apply_gradients(grads_and_vars,
                                               global_step=global_step,
                                               name="apply_grads")

      # Now reset the accumulators to zero
      resets_list = []
      with tf.control_dependencies([apply_san_grads]):
        for _, acc in self._grad_accum_dict.items():
          reset = tf.assign(acc, tf.zeros_like(acc))
          resets_list.append(reset)
      resets_list.append(self._batch_count.assign_add(1))

      last_step_update = tf.group(*([apply_san_grads] + resets_list))
      return last_step_update
    # pylint: disable=g-long-lambda
    update_op = tf.cond(update_cond,
                        lambda: last_in_lot_op(
                            loss, var_list,
                            global_step),
                        lambda: non_last_in_lot_op(
                            loss, var_list))
    return tf.group(update_op)


================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/sanitizer.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Defines Sanitizer class for sanitizing tensors.

A sanitizer first limits the sensitivity of a tensor and then adds noise
to the tensor. The parameters are determined by the privacy_spending and the
other parameters. It also uses an accountant to keep track of the privacy
spending.
"""
from __future__ import division

import collections

import tensorflow as tf

from differential_privacy.dp_sgd.dp_optimizer import utils

import pdb

ClipOption = collections.namedtuple("ClipOption",
                                    ["l2norm_bound", "clip"])


class AmortizedGaussianSanitizer(object):
  """Sanitizer with Gaussian noise and amoritzed privacy spending accounting.

  This sanitizes a tensor by first clipping the tensor, summing the tensor
  and then adding appropriate amount of noise. It also uses an amortized
  accountant to keep track of privacy spending.
  """

  def __init__(self, accountant, default_option):
    """Construct an AmortizedGaussianSanitizer.

    Args:
      accountant: the privacy accountant. Expect an amortized one.
      default_option: the default ClipOptoin.
    """

    self._accountant = accountant
    self._default_option = default_option
    self._options = {}

  def set_option(self, tensor_name, option):
    """Set options for an individual tensor.

    Args:
      tensor_name: the name of the tensor.
      option: clip option.
    """

    self._options[tensor_name] = option

  def sanitize(self, x, eps_delta, sigma=None,
               option=ClipOption(None, None), tensor_name=None,
               num_examples=None, add_noise=True):
    """Sanitize the given tensor.

    This santize a given tensor by first applying l2 norm clipping and then
    adding Gaussian noise. It calls the privacy accountant for updating the
    privacy spending.

    Args:
      x: the tensor to sanitize.
      eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to
        compute sigma if sigma is None.
      sigma: if sigma is not None, use sigma.
      option: a ClipOption which, if supplied, used for
        clipping and adding noise.
      tensor_name: the name of the tensor.
      num_examples: if None, use the number of "rows" of x.
      add_noise: if True, then add noise, else just clip.
    Returns:
      a pair of sanitized tensor and the operation to accumulate privacy
      spending.
    """
    if sigma is None:
      # pylint: disable=unpacking-non-sequence
      eps, delta = eps_delta
      with tf.control_dependencies(
          [tf.Assert(tf.greater(eps, 0),
                     ["eps needs to be greater than 0"]),
           tf.Assert(tf.greater(delta, 0),
                     ["delta needs to be greater than 0"])]):
        # The following formula is taken from
        #   Dwork and Roth, The Algorithmic Foundations of Differential
        #   Privacy, Appendix A.
        #   http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
        sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps

    l2norm_bound, clip = option
    if l2norm_bound is None:
      l2norm_bound, clip = self._default_option
      if ((tensor_name is not None) and
          (tensor_name in self._options)):
        l2norm_bound, clip = self._options[tensor_name]
    if clip:
      x = utils.BatchClipByL2norm(x, l2norm_bound)

    if add_noise:
      if num_examples is None:
        num_examples = tf.slice(tf.shape(x), [0], [1])
      privacy_accum_op = self._accountant.accumulate_privacy_spending(
          eps_delta, sigma, num_examples)
      with tf.control_dependencies([privacy_accum_op]):
        saned_x = utils.AddGaussianNoise(tf.reduce_sum(x, 0),
                                         sigma * l2norm_bound)
    else:
      saned_x = tf.reduce_sum(x, 0)
    return saned_x


================================================
FILE: differential_privacy/dp_sgd/dp_optimizer/utils.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Utils for building and training NN models.
"""
from __future__ import division

import math

import numpy
import tensorflow as tf


class LayerParameters(object):
  """class that defines a non-conv layer."""
  def __init__(self):
    self.name = ""
    self.num_units = 0
    self._with_bias = False
    self.relu = False
    self.gradient_l2norm_bound = 0.0
    self.bias_gradient_l2norm_bound = 0.0
    self.trainable = True
    self.weight_decay = 0.0


class ConvParameters(object):
  """class that defines a conv layer."""
  def __init__(self):
    self.patch_size = 5
    self.stride = 1
    self.in_channels = 1
    self.out_channels = 0
    self.with_bias = True
    self.relu = True
    self.max_pool = True
    self.max_pool_size = 2
    self.max_pool_stride = 2
    self.trainable = False
    self.in_size = 28
    self.name = ""
    self.num_outputs = 0
    self.bias_stddev = 0.1


# Parameters for a layered neural network.
class NetworkParameters(object):
  """class that define the overall model structure."""
  def __init__(self):
    self.input_size = 0
    self.projection_type = 'NONE'  # NONE, RANDOM, PCA
    self.projection_dimensions = 0
    self.default_gradient_l2norm_bound = 0.0
    self.layer_parameters = []  # List of LayerParameters
    self.conv_parameters = []  # List of ConvParameters


def GetTensorOpName(x):
  """Get the name of the op that created a tensor.

  Useful for naming related tensors, as ':' in name field of op is not permitted

  Args:
    x: the input tensor.
  Returns:
    the name of the op.
  """

  t = x.name.rsplit(":", 1)
  if len(t) == 1:
    return x.name
  else:
    return t[0]


def BuildNetwork(inputs, network_parameters):
  """Build a network using the given parameters.

  Args:
    inputs: a Tensor of floats containing the input data.
    network_parameters: NetworkParameters object
      that describes the parameters for the network.
  Returns:
    output, training_parameters: where the outputs (a tensor) is the output
      of the network, and training_parameters (a dictionary that maps the
      name of each variable to a dictionary of parameters) is the parameters
      used during training.
  """

  training_parameters = {}
  num_inputs = network_parameters.input_size
  outputs = inputs
  projection = None

  # First apply convolutions, if needed
  for conv_param in network_parameters.conv_parameters:
    outputs = tf.reshape(
        outputs,
        [-1, conv_param.in_size, conv_param.in_size,
         conv_param.in_channels])
    conv_weights_name = "%s_conv_weight" % (conv_param.name)
    conv_bias_name = "%s_conv_bias" % (conv_param.name)
    conv_std_dev = 1.0 / (conv_param.patch_size
                          * math.sqrt(conv_param.in_channels))
    conv_weights = tf.Variable(
        tf.truncated_normal([conv_param.patch_size,
                             conv_param.patch_size,
                             conv_param.in_channels,
                             conv_param.out_channels],
                            stddev=conv_std_dev),
        trainable=conv_param.trainable,
        name=conv_weights_name)
    conv_bias = tf.Variable(
        tf.truncated_normal([conv_param.out_channels],
                            stddev=conv_param.bias_stddev),
        trainable=conv_param.trainable,
        name=conv_bias_name)
    training_parameters[conv_weights_name] = {}
    training_parameters[conv_bias_name] = {}
    conv = tf.nn.conv2d(outputs, conv_weights,
                        strides=[1, conv_param.stride,
                                 conv_param.stride, 1],
                        padding="SAME")
    relud = tf.nn.relu(conv + conv_bias)
    mpd = tf.nn.max_pool(relud, ksize=[1,
                                       conv_param.max_pool_size,
                                       conv_param.max_pool_size, 1],
                         strides=[1, conv_param.max_pool_stride,
                                  conv_param.max_pool_stride, 1],
                         padding="SAME")
    outputs = mpd
    num_inputs = conv_param.num_outputs
    # this should equal
    # in_size * in_size * out_channels / (stride * max_pool_stride)

  # once all the convs are done, reshape to make it flat
  outputs = tf.reshape(outputs, [-1, num_inputs])

  # Now project, if needed
  if network_parameters.projection_type is not "NONE":
    projection = tf.Variable(tf.truncated_normal(
        [num_inputs, network_parameters.projection_dimensions],
        stddev=1.0 / math.sqrt(num_inputs)), trainable=False, name="projection")
    num_inputs = network_parameters.projection_dimensions
    outputs = tf.matmul(outputs, projection)

  # Now apply any other layers

  for layer_parameters in network_parameters.layer_parameters:
    num_units = layer_parameters.num_units
    hidden_weights_name = "%s_weight" % (layer_parameters.name)
    hidden_weights = tf.Variable(
        tf.truncated_normal([num_inputs, num_units],
                            stddev=1.0 / math.sqrt(num_inputs)),
        name=hidden_weights_name, trainable=layer_parameters.trainable)
    training_parameters[hidden_weights_name] = {}
    if layer_parameters.gradient_l2norm_bound:
      training_parameters[hidden_weights_name]["gradient_l2norm_bound"] = (
          layer_parameters.gradient_l2norm_bound)
    if layer_parameters.weight_decay:
      training_parameters[hidden_weights_name]["weight_decay"] = (
          layer_parameters.weight_decay)

    outputs = tf.matmul(outputs, hidden_weights)
    if layer_parameters.with_bias:
      hidden_biases_name = "%s_bias" % (layer_parameters.name)
      hidden_biases = tf.Variable(tf.zeros([num_units]),
                                  name=hidden_biases_name)
      training_parameters[hidden_biases_name] = {}
      if layer_parameters.bias_gradient_l2norm_bound:
        training_parameters[hidden_biases_name][
            "bias_gradient_l2norm_bound"] = (
                layer_parameters.bias_gradient_l2norm_bound)

      outputs += hidden_biases
    if layer_parameters.relu:
      outputs = tf.nn.relu(outputs)
    # num_inputs for the next layer is num_units in the current layer.
    num_inputs = num_units

  return outputs, projection, training_parameters


def VaryRate(start, end, saturate_epochs, epoch):
  """Compute a linearly varying number.

  Decrease linearly from start to end until epoch saturate_epochs.

  Args:
    start: the initial number.
    end: the end number.
    saturate_epochs: after this we do not reduce the number; if less than
      or equal to zero, just return start.
    epoch: the current learning epoch.
  Returns:
    the caculated number.
  """
  if saturate_epochs <= 0:
    return start

  step = (start - end) / (saturate_epochs - 1)
  if epoch < saturate_epochs:
    return start - step * epoch
  else:
    return end


def BatchClipByL2norm(t, upper_bound, name=None):
  """Clip an array of tensors by L2 norm.

  Shrink each dimension-0 slice of tensor (for matrix it is each row) such
  that the l2 norm is at most upper_bound. Here we clip each row as it
  corresponds to each example in the batch.

  Args:
    t: the input tensor.
    upper_bound: the upperbound of the L2 norm.
    name: optional name.
  Returns:
    the clipped tensor.
  """

  assert upper_bound > 0
  with tf.name_scope(values=[t, upper_bound], name=name,
                     default_name="batch_clip_by_l2norm") as name:
    saved_shape = tf.shape(t)
    batch_size = tf.slice(saved_shape, [0], [1])
    t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
                              tf.constant(1.0/upper_bound))
    # Add a small number to avoid divide by 0
    l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
    scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound
    clipped_t = tf.matmul(tf.diag(scale), t2)
    clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
  return clipped_t


def SoftThreshold(t, threshold_ratio, name=None):
  """Soft-threshold a tensor by the mean value.

  Softthreshold each dimension-0 vector (for matrix it is each column) by
  the mean of absolute value multiplied by the threshold_ratio factor. Here
  we soft threshold each column as it corresponds to each unit in a layer.

  Args:
    t: the input tensor.
    threshold_ratio: the threshold ratio.
    name: the optional name for the returned tensor.
  Returns:
    the thresholded tensor, where each entry is soft-thresholded by
    threshold_ratio times the mean of the aboslute value of each column.
  """

  assert threshold_ratio >= 0
  with tf.name_scope(values=[t, threshold_ratio], name=name,
                     default_name="soft_thresholding") as name:
    saved_shape = tf.shape(t)
    t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
    t_abs = tf.abs(t2)
    t_x = tf.sign(t2) * tf.nn.relu(t_abs -
                                   (tf.reduce_mean(t_abs, [0],
                                                   keep_dims=True) *
                                    threshold_ratio))
    return tf.reshape(t_x, saved_shape, name=name)


def AddGaussianNoise(t, sigma, name=None):
  """Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.

  Args:
    t: the input tensor.
    sigma: the stddev of the Gaussian noise.
    name: optional name.
  Returns:
    the noisy tensor.
  """

  with tf.name_scope(values=[t, sigma], name=name,
                     default_name="add_gaussian_noise") as name:
    noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
  return noisy_t


def GenerateBinomialTable(m):
  """Generate binomial table.

  Args:
    m: the size of the table.
  Returns:
    A two dimensional array T where T[i][j] = (i choose j),
    for 0<= i, j <=m.
  """

  table = numpy.zeros((m + 1, m + 1), dtype=numpy.float64)
  for i in range(m + 1):
    table[i, 0] = 1
  for i in range(1, m + 1):
    for j in range(1, m + 1):
      v = table[i - 1, j] + table[i - 1, j -1]
      assert not math.isnan(v) and not math.isinf(v)
      table[i, j] = v
  return tf.convert_to_tensor(table)


================================================
FILE: differential_privacy/privacy_accountant/tf/accountant.py
================================================
# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
# possibly with some small edits by @corcra)

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Defines Accountant class for keeping track of privacy spending.

A privacy accountant keeps track of privacy spendings. It has methods
accumulate_privacy_spending and get_privacy_spent. Here we only define
AmortizedAccountant which tracks the privacy spending in the amortized
way. It uses privacy amplication via sampling to compute the privacy
spending for each batch and strong composition (specialized for Gaussian
noise) for accumulate the privacy spending.
"""
from __future__ import division

import abc
import collections
import math
import sys

import numpy
import tensorflow as tf

from differential_privacy.dp_sgd.dp_optimizer import utils

EpsDelta = collections.namedtuple("EpsDelta", ["spent_eps", "spent_delta"])

import pdb

# TODO(liqzhang) To ensure the same API for AmortizedAccountant and
# MomentsAccountant, we pass the union of arguments to both, so we
# have unused_sigma for AmortizedAccountant and unused_eps_delta for
# MomentsAccountant. Consider to revise the API to avoid the unused
# arguments.  It would be good to use @abc.abstractmethod, etc, to
# define the common interface as a base class.
class AmortizedAccountant(object):
  """Keep track of privacy spending in an amortized way.

  AmortizedAccountant accumulates the privacy spending by assuming
  all the examples are processed uniformly at random so the spending is
  amortized among all the examples. And we assume that we use Gaussian noise
  so the accumulation is on eps^2 and delta, using advanced composition.
  """

  def __init__(self, total_examples):
    """Initialization. Currently only support amortized tracking.

    Args:
      total_examples: total number of examples.
    """

    assert total_examples > 0
    self._total_examples = total_examples
    self._eps_squared_sum = tf.Variable(tf.zeros([1]), trainable=False,
                                        name="eps_squared_sum")
    self._delta_sum = tf.Variable(tf.zeros([1]), trainable=False,
                                  name="delta_sum")

  def accumulate_privacy_spending(self, eps_delta, unused_sigma,
                                  num_examples):
    """Accumulate the privacy spending.

    Currently only support approximate privacy. Here we assume we use Gaussian
    noise on randomly sampled batch so we get better composition: 1. the per
    batch privacy is computed using privacy amplication via sampling bound;
    2. the composition is done using the composition with Gaussian noise.
    TODO(liqzhang) Add a link to a document that describes the bounds used.

    Args:
      eps_delta: EpsDelta pair which can be tensors.
      unused_sigma: the noise sigma. Unused for this accountant.
      num_examples: the number of examples involved.
    Returns:
      a TensorFlow operation for updating the privacy spending.
    """

    eps, delta = eps_delta
    with tf.control_dependencies(
        [tf.Assert(tf.greater(delta, 0),
                   ["delta needs to be greater than 0"])]):
      amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /
                        self._total_examples)
      # Use privacy amplification via sampling bound.
      # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf
      # TODO(liqzhang) Add a link to a document with formal statement
      # and proof.
      amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (
          tf.exp(eps) - 1.0)), [1])
      amortize_delta = tf.reshape(amortize_ratio * delta, [1])
      return tf.group(*[tf.assign_add(self._eps_squared_sum,
                                      tf.square(amortize_eps)),
                        tf.assign_add(self._delta_sum, amortize_delta)])

  def get_privacy_spent(self, sess, target_eps=None):
    """Report the spending so far.

    Args:
      sess: the session to run the tensor.
      target_eps: the target epsilon. Unused.
    Returns:
      the list containing a single EpsDelta, with values as Python floats (as
      opposed to numpy.float64). This is to be consistent with
      MomentAccountant which can return a list of (eps, delta) pair.
    """

    # pylint: disable=unused-argument
    unused_target_eps = target_eps
    eps_squared_sum, delta_sum = sess.run([self._eps_squared_sum,
                                           self._delta_sum])
    return [EpsDelta(math.sqrt(eps_squared_sum), float(delta_sum))]


class MomentsAccountant(object):
  """Privacy accountant which keeps track of moments of privacy loss.

  Note: The constructor of this class creates tf.Variables that must
  be initialized with tf.global_variables_initializer() or similar calls.

  MomentsAccountant accumulates the high moments of the privacy loss. It
  requires a method for computing differenital moments of the noise (See
  below for the definition). So every specific accountant should subclass
  this class by implementing _differential_moments method.

  Denote by X_i the random variable of privacy loss at the i-th step.
  Consider two databases D, D' which differ by one item. X_i takes value
  log Pr[M(D')==x]/Pr[M(D)==x] with probability Pr[M(D)==x].
  In MomentsAccountant, we keep track of y_i(L) = log E[exp(L X_i)] for some
  large enough L. To compute the final privacy spending,  we apply Chernoff
  bound (assuming the random noise added at each step is independent) to
  bound the total privacy loss Z = sum X_i as follows:
    Pr[Z > e] = Pr[exp(L Z) > exp(L e)]
              < E[exp(L Z)] / exp(L e)
              = Prod_i E[exp(L X_i)] / exp(L e)
              = exp(sum_i log E[exp(L X_i)]) / exp(L e)
              = exp(sum_i y_i(L) - L e)
  Hence the mechanism is (e, d)-differentially private for
    d =  exp(sum_i y_i(L) - L e).
  We require d < 1, i.e. e > sum_i y_i(L) / L. We maintain y_i(L) for several
  L to compute the best d for any give e (normally should be the lowest L
  such that 2 * sum_i y_i(L) / L < e.

  We further assume that at each step, the mechanism operates on a random
  sample with sampling probability q = batch_size / total_examples. Then
    E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]
  By distinguishing two cases of whether D < D' or D' < D, we have
  that
    E[exp(L X)] <= max (I1, I2)
  where
    I1 = (1-q) E ((1-q) + q P(X+1) / P(X))^L + q E ((1-q) + q P(X) / P(X-1))^L
    I2 = E (P(X) / ((1-q) + q P(X+1)))^L

  In order to compute I1 and I2, one can consider to
    1. use an asymptotic bound, which recovers the advance composition theorem;
    2. use the closed formula (like GaussianMomentsAccountant);
    3. use numerical integration or random sample estimation.

  Dependent on the distribution, we can often obtain a tigher estimation on
  the moments and hence a more accurate estimation of the privacy loss than
  obtained using generic composition theorems.

  """

  __metaclass__ = abc.ABCMeta

  def __init__(self, total_examples, moment_orders=32):
    """Initialize a MomentsAccountant.

    Args:
      total_examples: total number of examples.
      moment_orders: the order of moments to keep.
    """

    assert total_examples > 0
    self._total_examples = total_examples
    self._moment_orders = (moment_orders
                           if isinstance(moment_orders, (list, tuple))
                           else range(1, moment_orders + 1))
    self._max_moment_order = max(self._moment_orders)
    assert self._max_moment_order < 100, "The moment order is too large."
    self._log_moments = [tf.Variable(numpy.float64(0.0),
                                     trainable=False,
                                     name=("log_moments-%d" % moment_order))
                         for moment_order in self._moment_orders]

  @abc.abstractmethod
  def _compute_log_moment(self, sigma, q, moment_order):
    """Compute high moment of privacy loss.

    Args:
      sigma: the noise sigma, in the multiples of the sensitivity.
      q: the sampling ratio.
      moment_order: the order of moment.
    Returns:
      log E[exp(moment_order * X)]
    """
    pass

  def accumulate_privacy_spending(self, unused_eps_delta,
                                  sigma, num_examples):
    """Accumulate privacy spending.

    In particular, accounts for privacy spending when we assume there
    are num_examples, and we are releasing the vector
    (sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma)
    where l2norm_bound is the maximum l2_norm of each example x_i, and
    the num_examples have been randomly selected out of a pool of
    self.total_examples.

    Args:
      unused_eps_delta: EpsDelta pair which can be tensors. Unused
        in this accountant.
      sigma: the noise sigma, in the multiples of the sensitivity (that is,
        if the l2norm sensitivity is k, then the caller must have added
        Gaussian noise with stddev=k*sigma to the result of the query).
      num_examples: the number of examples involved.
    Returns:
      a TensorFlow operation for updating the privacy spending.
    """
    q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples

    moments_accum_ops = []
    for i in range(len(self._log_moments)):
      moment = self._compute_log_moment(sigma, q, self._moment_orders[i])
      moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment))
    return tf.group(*moments_accum_ops)

  def _compute_delta(self, log_moments, eps):
    """Compute delta for given log_moments and eps.

    Args:
      log_moments: the log moments of privacy loss, in the form of pairs
        of (moment_order, log_moment)
      eps: the target epsilon.
    Returns:
      delta
    """
    min_delta = 1.0
    for moment_order, log_moment in log_moments:
      if math.isinf(log_moment) or math.isnan(log_moment):
        sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
        continue
      if log_moment < moment_order * eps:
        min_delta = min(min_delta,
                        math.exp(log_moment - moment_order * eps))
    return min_delta

  def _compute_eps(self, log_moments, delta):
    min_eps = float("inf")
    for moment_order, log_moment in log_moments:
      if math.isinf(log_moment) or math.isnan(log_moment):
        sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
        continue
      min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order)
    return min_eps

  def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):
    """Compute privacy spending in (e, d)-DP form for a single or list of eps.

    Args:
      sess: the session to run the tensor.
      target_eps: a list of target epsilon's for which we would like to
        compute corresponding delta value.
      target_deltas: a list of target deltas for which we would like to
        compute the corresponding eps value. Caller must specify
        either target_eps or target_delta.
    Returns:
      A list of EpsDelta pairs.
    """
    assert (target_eps is None) ^ (target_deltas is None)
    eps_deltas = []
    log_moments = sess.run(self._log_moments)
    log_moments_with_order = numpy.array(list(zip(self._moment_orders, log_moments)))
    if target_eps is not None:
      for eps in target_eps:
        delta = self._compute_delta(log_moments_with_order, eps)
        eps_deltas.append(EpsDelta(eps, delta))
    else:
      assert target_deltas
      for delta in target_deltas:
        eps_deltas.append(
            EpsDelta(self._compute_eps(log_moments_with_order, delta), delta))
    return eps_deltas


class GaussianMomentsAccountant(MomentsAccountant):
  """MomentsAccountant which assumes Gaussian noise.

  GaussianMomentsAccountant assumes the noise added is centered Gaussian
  noise N(0, sigma^2 I). In this case, we can compute the differential moments
  accurately using a formula.

  For asymptotic bound, for Gaussian noise with variance sigma^2, we can show
  for L < sigma^2,  q L < sigma,
    log E[exp(L X)] = O(q^2 L^2 / sigma^2).
  Using this we derive that for training T epoches, with batch ratio q,
  the Gaussian mechanism with variance sigma^2 (with q < 1/sigma) is (e, d)
  private for d = exp(T/q q^2 L^2 / sigma^2 - L e). Setting L = sigma^2,
  Tq = e/2, the mechanism is (e, exp(-e sigma^2/2))-DP. Equivalently, the
  mechanism is (e, d)-DP if sigma = sqrt{2 log(1/d)}/e, q < 1/sigma,
  and T < e/(2q). This bound is better than the bound obtained using general
  composition theorems, by an Omega(sqrt{log k}) factor on epsilon, if we run
  k steps. Since we use direct estimate, the obtained privacy bound has tight
  constant.

  For GaussianMomentAccountant, it suffices to compute I1, as I1 >= I2,
  which reduce to computing E(P(x+s)/P(x+s-1) - 1)^i for s = 0 and 1. In the
  companion gaussian_moments.py file, we supply procedure for computing both
  I1 and I2 (the computation of I2 is through multi-precision integration
  package). It can be verified that indeed I1 >= I2 for wide range of parameters
  we have tried, though at the moment we are unable to prove this claim.

  We recommend that when using this accountant, users independently verify
  using gaussian_moments.py that for their parameters, I1 is indeed larger
  than I2. This can be done by following the instructions in
  gaussian_moments.py.
  """

  def __init__(self, total_examples, moment_orders=32):
    """Initialization.

    Args:
      total_examples: total number of examples.
      moment_orders: the order of moments to keep.
    """
    super(self.__class__, self).__init__(total_examples, moment_orders)
    self._binomial_table = utils.GenerateBinomialTable(self._max_moment_order)

  def _differential_moments(self, sigma, s, t):
    """Compute 0 to t-th differential moments for Gaussian variable.

        E[(P(x+s)/P(x+s-1)-1)^t]
      = sum_{i=0}^t (t choose i) (-1)^{t-i} E[(P(x+s)/P(x+s-1))^i]
      = sum_{i=0}^t (t choose i) (-1)^{t-i} E[exp(-i*(2*x+2*s-1)/(2*sigma^2))]
      = sum_{i=0}^t (t choose i) (-1)^{t-i} exp(i(i+1-2*s)/(2 sigma^2))
    Args:
      sigma: the noise sigma, in the multiples of the sensitivity.
      s: the shift.
      t: 0 to t-th moment.
    Returns:
      0 to t-th moment as a tensor of shape [t+1].
    """
    assert t <= self._max_moment_order, ("The order of %d is out "
                                         "of the upper bound %d."
                                         % (t, self._max_moment_order))
    binomial = tf.slice(self._binomial_table, [0, 0],
                        [t + 1, t + 1])
    signs = numpy.zeros((t + 1, t + 1), dtype=numpy.float64)
    for i in range(t + 1):
      for j in range(t + 1):
        signs[i, j] = 1.0 - 2 * ((i - j) % 2)
    exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
                             for j in range(t + 1)], dtype=tf.float64)
    # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
    x = tf.multiply(binomial, signs)
    # y[i, j] = x[i, j] * exp(exponents[j])
    #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    # Note: this computation is done by broadcasting pointwise multiplication
    # between [t+1, t+1] tensor and [t+1] tensor.
    y = tf.multiply(x, tf.exp(exponents))
    # z[i] = sum_j y[i, j]
    #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    z = tf.reduce_sum(y, 1)
    return z

  def _compute_log_moment(self, sigma, q, moment_order):
    """Compute high moment of privacy loss.

    Args:
      sigma: the noise sigma, in the multiples of the sensitivity.
      q: the sampling ratio.
      moment_order: the order of moment.
    Returns:
      log E[exp(moment_order * X)]
    """
    assert moment_order <= self._max_moment_order, ("The order of %d is out "
                                                    "of the upper bound %d."
                                                    % (moment_order,
                                                       self._max_moment_order))
    binomial_table = tf.slice(self._binomial_table, [moment_order, 0],
                              [1, moment_order + 1])
    # qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q))
    qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)],
                            dtype=tf.float64) * tf.cast(
                                tf.log(q), dtype=tf.float64))
    moments0 = self._differential_moments(sigma, 0.0, moment_order)
    term0 = tf.reduce_sum(binomial_table * qs * moments0)
    moments1 = self._differential_moments(sigma, 1.0, moment_order)
    term1 = tf.reduce_sum(binomial_table * qs * moments1)
    return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1,
                                     tf.float64)))


class DummyAccountant(object):
  """An accountant that does no accounting."""

  def accumulate_privacy_spending(self, *unused_args):
    return tf.no_op()

  def get_privacy_spent(self, unused_sess, **unused_kwargs):
    return [EpsDelta(numpy.inf, 1.0)]


================================================
FILE: eugenium_mmd.py
================================================
'''
Code taken from: https://github.com/eugenium/mmd
(modified slightly for efficiency/PEP by Stephanie Hyland)

Python implementation of MMD and Covariance estimates for Relative MMD

Some code is based on code from Vincent Van Asch 
which is based on matlab code from Arthur Gretton


Eugene Belilovsky
eugene.belilovsky@inria.fr
'''
import numpy as np
import scipy as sp
from numpy import sqrt
from sklearn.metrics.pairwise import rbf_kernel
from functools import partial
import pdb

def my_kernel(X, Y, sigma):
    gamma = 1 / (2 * sigma**2)
    if len(X.shape) == 2:
        X_sqnorms = np.einsum('...i,...i', X, X)
        Y_sqnorms = np.einsum('...i,...i', Y, Y)
        XY = np.einsum('ia,ja', X, Y)
    elif len(X.shape) == 3:
        X_sqnorms = np.einsum('...ij,...ij', X, X)
        Y_sqnorms = np.einsum('...ij,...ij', Y, Y)
        XY = np.einsum('iab,jab', X, Y)
    else:
        pdb.set_trace()
    Kxy = np.exp(-gamma*(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1)))
    return Kxy

def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=False):
    '''Performs the relative MMD test which returns a test statistic for whether Y is closer to X or than Z.
    See http://arxiv.org/pdf/1511.04581.pdf
    The bandwith heuristic is based on the median heuristic (see Smola,Gretton).
    '''
    if(sigma<0):
        #Similar heuristics
        if SelectSigma:
            siz=np.min((1000, X.shape[0]))
            sigma1=kernelwidthPair(X[0:siz], Y[0:siz]);
            sigma2=kernelwidthPair(X[0:siz], Z[0:siz]);
            sigma=(sigma1+sigma2)/2.
        else:
            siz=np.min((1000, X.shape[0]*3))
            Zem=np.r_[X[0:siz/3], Y[0:siz/3], Z[0:siz/3]]
            sigma=kernelwidth(Zem);

    #kernel = partial(rbf_kernel, gamma=1.0/(sigma**2))
    kernel = partial(my_kernel, sigma=sigma)
    #kernel = partial(grbf, sigma=sigma)

    Kyy = kernel(Y, Y)
    Kzz = kernel(Z, Z)
    Kxy = kernel(X, Y)
    Kxz = kernel(X, Z)

    Kyynd = Kyy-np.diag(np.diagonal(Kyy))
    Kzznd = Kzz-np.diag(np.diagonal(Kzz))
    m = Kxy.shape[0];
    n = Kyy.shape[0];
    r = Kzz.shape[0];    

    
    u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) )
    u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) )
    u_xy=np.sum(Kxy)/(m*n)
    u_xz=np.sum(Kxz)/(m*r)
    #Compute the test statistic
    t=u_yy - 2.*u_xy - (u_zz-2.*u_xz)
    Diff_Var, Diff_Var_z2, data=MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz)

    pvalue=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var)))
  #  pvalue_z2=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var_z2)))
    tstat=t/sqrt(Diff_Var)
    
    if(computeMMDs):
         Kxx = kernel(X, X)
         Kxxnd = Kxx-np.diag(np.diagonal(Kxx))
         u_xx=np.sum(Kxxnd)*( 1./(m*(m-1)) )
         MMDXY=u_xx+u_yy-2.*u_xy
         MMDXZ=u_xx+u_zz-2.*u_xz
    else:
         MMDXY=None
         MMDXZ=None
    return pvalue, tstat, sigma, MMDXY, MMDXZ
    
def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz):
    '''
    Compute the variance of the difference statistic MMDXY-MMDXZ
    See http://arxiv.org/pdf/1511.04581.pdf Appendix for derivations
    '''
    m = Kxy.shape[0];
    n = Kyy.shape[0];
    r = Kzz.shape[0];
    
    
    Kyynd = Kyy-np.diag(np.diagonal(Kyy));
    Kzznd = Kzz-np.diag(np.diagonal(Kzz));
    
    u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) );
    u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) );
    u_xy=np.sum(Kxy)/(m*n);
    u_xz=np.sum(Kxz)/(m*r);
    
    #compute zeta1
    t1=(1./n**3)*np.sum(Kyynd.T.dot(Kyynd))-u_yy**2;
    t2=(1./(n**2*m))*np.sum(Kxy.T.dot(Kxy))-u_xy**2;
    t3=(1./(n*m**2))*np.sum(Kxy.dot(Kxy.T))-u_xy**2;
    t4=(1./r**3)*np.sum(Kzznd.T.dot(Kzznd))-u_zz**2;
    t5=(1./(r*m**2))*np.sum(Kxz.dot(Kxz.T))-u_xz**2;
    t6=(1./(r**2*m))*np.sum(Kxz.T.dot(Kxz))-u_xz**2;
    t7=(1./(n**2*m))*np.sum(Kyynd.dot(Kxy.T))-u_yy*u_xy;
    t8=(1./(n*m*r))*np.sum(Kxy.T.dot(Kxz))-u_xz*u_xy;
    t9=(1./(r**2*m))*np.sum(Kzznd.dot(Kxz.T))-u_zz*u_xz;
    
    zeta1=(t1+t2+t3+t4+t5+t6-2.*(t7+t8+t9)); 
    
    zeta2=(1/m/(m-1))*np.sum((Kyynd-Kzznd-Kxy.T-Kxy+Kxz+Kxz.T)**2)-(u_yy - 2.*u_xy - (u_zz-2.*u_xz))**2;
    
    
    data=dict({'t1':t1,
               't2':t2,
               't3':t3,
               't4':t4,
               't5':t5,
               't6':t6,
               't7':t7,
               't8':t8,
               't9':t9,
               'zeta1':zeta1,
               'zeta2':zeta2,
                })
    #TODO more precise version for zeta2 
    #    xx=(1/m^2)*sum(sum(Kxxnd.*Kxxnd))-u_xx^2;
    # yy=(1/n^2)*sum(sum(Kyynd.*Kyynd))-u_yy^2;
    #xy=(1/(n*m))*sum(sum(Kxy.*Kxy))-u_xy^2;
    #xxy=(1/(n*m^2))*sum(sum(Kxxnd*Kxy))-u_xx*u_xy;
    #yyx=(1/(n^2*m))*sum(sum(Kyynd*Kxy'))-u_yy*u_xy;
    #zeta2=(xx+yy+xy+xy-2*(xxy+xxy +yyx+yyx))
    
    
    Var=(4.*(m-2)/(m*(m-1)))*zeta1;
    Var_z2=Var+(2./(m*(m-1)))*zeta2;

    return Var, Var_z2, data
def grbf(x1, x2, sigma):
    '''Calculates the Gaussian radial base function kernel'''
    n, nfeatures = x1.shape
    m, mfeatures = x2.shape
    
    k1 = np.sum((x1*x1), 1)
    q = np.tile(k1, (m, 1)).transpose()
    del k1
    
    k2 = np.sum((x2*x2), 1)
    r = np.tile(k2.T, (n, 1))
    del k2
    
    h = q + r
    del q, r
    
    # The norm
    h = h - 2*np.dot(x1, x2.transpose())
    h = np.array(h, dtype=float)
    
    return np.exp(-1.*h/(2.*pow(sigma, 2)))
    
    
def kernelwidthPair(x1, x2):
    '''Implementation of the median heuristic. See Gretton 2012
       Pick sigma such that the exponent of exp(- ||x-y|| / (2*sigma2)),
       in other words ||x-y|| / (2*sigma2),  equals 1 for the median distance x
       and y of all distances between points from both data sets X and Y.
    '''
    n, nfeatures = x1.shape
    m, mfeatures = x2.shape
    
    k1 = np.sum((x1*x1), 1)
    q = np.tile(k1, (m, 1)).transpose()
    del k1
    
    k2 = np.sum((x2*x2), 1)
    r = np.tile(k2, (n, 1))
    del k2
    
    h= q + r
    del q, r
    
    # The norm
    h = h - 2*np.dot(x1, x2.transpose())
    h = np.array(h, dtype=float)
    
    mdist = np.median([i for i in h.flat if i])
    
    sigma = sqrt(mdist/2.0)
    if not sigma: sigma = 1
    
    return sigma
def kernelwidth(Zmed):
    '''Alternative median heuristic when we cant partition the points
    '''
    m= Zmed.shape[0]
    k1 = np.expand_dims(np.sum((Zmed*Zmed), axis=1), 1)
    q = np.kron(np.ones((1, m)), k1)
    r = np.kron(np.ones((m, 1)), k1.T)
    del k1
    
    h= q + r
    del q, r
    
    # The norm
    h = h - 2.*Zmed.dot(Zmed.T)
    h = np.array(h, dtype=float)
    
    mdist = np.median([i for i in h.flat if i])
    
    sigma = sqrt(mdist/2.0)
    if not sigma: sigma = 1
    
    return sigma
    
    

def MMD_unbiased(Kxx, Kyy, Kxy):
#The estimate when distribution of x is not equal to y
    m = Kxx.shape[0]
    n = Kyy.shape[0]
    
    t1 = (1./(m*(m-1)))*np.sum(Kxx - np.diag(np.diagonal(Kxx)))
    t2 = (2./(m*n)) * np.sum(Kxy)
    t3 = (1./(n*(n-1)))* np.sum(Kyy - np.diag(np.diagonal(Kyy)))
    
    MMDsquared = (t1-t2+t3)
    
    return MMDsquared


================================================
FILE: eval.py
================================================
#!/usr/bin/env ipython
# Evaluation of models
#

import json
import pdb
import numpy as np
import pandas as pd
from eugenium_mmd import MMD_3_Sample_Test
from scipy.stats import ks_2samp
import mmd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score
from sklearn.ensemble import RandomForestClassifier
import sklearn
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

# for keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.backend import clear_session

import model
import data_utils
import plotting

import pickle

def assert_same_data(A, B):
    # case 0, both loaded
    if A['data'] == 'load' and B['data'] == 'load':
        assert A['data_load_from'] == B['data_load_from']
        data_path = './experiments/data/' + A['data_load_from']
    elif A['data'] == 'load' and (not B['data'] == 'load'):
        assert A['data_load_from'] == B['identifier']
        data_path = './experiments/data/' + A['data_load_from']
    elif (not A['data'] == 'load') and B['data'] == 'load':
        assert B['data_load_from'] == A['identifier']
        data_path = './experiments/data/' + A['identifier']
    else:
        raise ValueError(A['data'], B['data'])
    return data_path

def model_memorisation(identifier, epoch, max_samples=2000, tstr=False):
    """
    Compare samples from a model against training set and validation set in mmd
    """
    if tstr:
        print('Loading data from TSTR experiment (not sampling from model)')
        # load pre-generated samples
        synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
        model_samples = synth_data['samples']
        synth_labels = synth_data['labels']
        # load real data used in that experiment
        real_data = np.load('./experiments/data/' + identifier + '.data.npy').item()
        real_samples = real_data['samples']
        train = real_samples['train']
        test = real_samples['test']
        n_samples = test.shape[0]
        if model_samples.shape[0] > n_samples:
            model_samples = np.random.permutation(model_samples)[:n_samples]
        print('Data loaded successfully!')
    else:
        if identifier == 'cristobal_eICU':
            model_samples = pickle.load(open('REDACTED', 'rb'))
            samples, labels = data_utils.eICU_task()
            train = samples['train'].reshape(-1,16,4)
            vali = samples['vali'].reshape(-1,16,4)
            test = samples['test'].reshape(-1,16,4)
            #train_targets = labels['train']
            #vali_targets = labels['vali']
            #test_targets = labels['test']
            train, vali, test = data_utils.scale_data(train, vali, test)
            n_samples = test.shape[0]
            if n_samples > max_samples:
                n_samples = max_samples
                test = np.random.permutation(test)[:n_samples]
            if model_samples.shape[0] > n_samples:
                model_samples = np.random.permutation(model_samples)[:n_samples]
        elif identifier == 'cristobal_MNIST':
            the_dir = 'REDACTED'
            # pick a random one
            which = np.random.choice(['NEW_OK_', '_r4', '_r5', '_r6', '_r7'])
            model_samples, model_labels = pickle.load(open(the_dir + 'synth_mnist_minist_cdgan_1_2_100_multivar_14_nolr_rdim3_0_2_' + which + '_190.pk', 'rb'))
            # get test and train...
            # (generated with fixed seed...)
            mnist_resized_dim = 14
            samples, labels = data_utils.load_resized_mnist(mnist_resized_dim)
            proportions = [0.6, 0.2, 0.2]
            train, vali, test, labels_split = data_utils.split(samples, labels=labels, random_seed=1, proportions=proportions)
            np.random.seed()
            train = train.reshape(-1, 14, 14)
            test = test.reshape(-1, 14, 14)
            vali = vali.reshape(-1, 14, 14)
            n_samples = test.shape[0]
            if n_samples > max_samples:
                n_samples = max_samples
                test = np.random.permutation(test)[:n_samples]
            if model_samples.shape[0] > n_samples:
                model_samples = np.random.permutation(model_samples)[:n_samples]
        else:
            settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
            # get the test, train sets
            data = np.load('./experiments/data/' + identifier + '.data.npy').item()
            train = data['samples']['train']
            test = data['samples']['test']
            n_samples = test.shape[0]
            if n_samples > max_samples:
                n_samples = max_samples
                test = np.random.permutation(test)[:n_samples]
            model_samples = model.sample_trained_model(settings, epoch, n_samples)
    all_samples = np.vstack([train, test, model_samples])
    heuristic_sigma = mmd.median_pairwise_distance(all_samples)
    print('heuristic sigma:', heuristic_sigma)
    pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, test, np.random.permutation(train)[:n_samples], sigma=heuristic_sigma, computeMMDs=False)
    #pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, np.random.permutation(train)[:n_samples], test, sigma=heuristic_sigma, computeMMDs=False)
#    if pvalue < 0.05:
#        print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that the test data has a smaller MMD with the true data than the generated data')
        # the function takes (X, Y, Z) as its first arguments, it's testing if MMDXY (i.e. MMD between model and train) is less than MMDXZ (MMd between model and test)
#    else:
#        print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclu#de that the test data has a smaller MMD with the true data than the generated data')
    return pvalue, tstat, sigma

def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99):
    """
    Compare two models using relative MMD test
    """
    # make sure they used the same data
    settings_A = json.load(open('./experiments/settings/' + identifier_A + '.txt', 'r'))
    settings_B = json.load(open('./experiments/settings/' + identifier_B + '.txt', 'r'))
    data_path = assert_same_data(settings_A, settings_B)
    # now load the data
    data = np.load(data_path + '.data.npy').item()['samples']['vali']
    n_samples = data.shape[0]
    A_samples = model.sample_trained_model(settings_A, epoch_A, n_samples)
    B_samples = model.sample_trained_model(settings_B, epoch_B, n_samples)
    # do the comparison
    # TODO: support multiple signals
    ## some notes about this test:
    ## MMD_3_Sample_Test(X, Y, Z) tests the hypothesis that Px is closer to Pz than Py
    ## that is, test the null hypothesis H0:
    ##   MMD(F, Px, Py) <= MMD(F, Px, Pz)
    ## versus the alternate hypothesis:
    ##   MMD(F, Px, Py) > MMD(F, Px, Pz)
    ## at significance level that we select later (just the threshold on the p-value)
    pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(data[:, :, 0], A_samples[:, :, 0], B_samples[:, :, 0], computeMMDs=True)
    print(pvalue, tstat, sigma)
    if pvalue < 0.05:
        print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)
    else:
        print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)
    return pvalue, tstat, sigma, MMDXY, MMDXZ

# --- to do with reconstruction --- #

def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False):
    """
    Get the reconstruction error of every point in the training set of a given
    experiment.
    """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    if settings['data_load_from']:
        data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item()
    else:
        data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item()
    samples = data_dict['samples']
    train = samples['train']
    vali = samples['vali']
    test = samples['test']
    labels = data_dict['labels']
    train_labels, test_labels, synth_labels, vali_labels = None, None, None, None
    try:
        if rerun:
            raise FileNotFoundError
        errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item()
        train_errors = errors['train']
        test_errors = errors['test']
        generated_errors = errors['generated']
        noisy_errors = errors['noisy']
        print('Loaded precomputed errors')
    except FileNotFoundError:
        if tstr:
            synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
            generated = synth_data['samples']
            synth_labels = synth_data['labels']
            train_labels = labels['train']
            test_labels = labels['test']
            vali_labels = labels['vali']
        else:
            # generate new data
            n_eval = 500
            # generate "easy" samples from the distribution
            generated = model.sample_trained_model(settings, epoch, n_eval)
            # generate "hard' random samples, not from train/test distribution
            # TODO: use original validation examples, add noise etc.
        ##    random_samples = np.random.normal(size=generated.shape)
        #    random_samples -= np.mean(random_samples, axis=0) 
        #    random_samples += np.mean(vali, axis=0)
        #    random_samples /= np.std(random_samples, axis=0)
        #    random_samples *= np.std(vali, axis=0)

        # get all the errors
        print('Getting reconstruction errors on train set')
        if train.shape[0] > max_samples:
            index_subset = np.random.permutation(train.shape[0])[:max_samples]
            train = train[index_subset]
            if train_labels is not None:
                train_labels = train_labels[index_subset]
        train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels)
        print('Getting reconstruction errors on test set')
        if test.shape[0] > max_samples:
            index_subset = np.random.permutation(test.shape[0])[:max_samples]
            test = test[index_subset]
            if test_labels is not None:
                test_labels = test_labels[index_subset]
        test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels)
        D_test, p_test = ks_2samp(train_errors, test_errors)
        print('KS statistic and p-value for train v. test erors:', D_test, p_test)
        pdb.set_trace()
        print('Getting reconstruction errors on generated set')
        generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels)
        D_gen, p_gen = ks_2samp(generated_errors, train_errors)
        print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
        D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
        print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)
#        print('Getting reconstruction errors on noisy set')
#        alpha = 0.5
#        noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali)
#        noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels)
        noisy_errors = None
        # save!
        errors = {'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors}
        np.save('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors)
    # do two-sample Kolomogorov-Smirnov test for equality
    D_test, p_test = ks_2samp(train_errors, test_errors)
    print('KS statistic and p-value for train v. test erors:', D_test, p_test)
    D_gen, p_gen = ks_2samp(generated_errors, train_errors)
    print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
    D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
    print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)
    # visualise distribution of errors for train and test
    plotting.reconstruction_errors(identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors)
    # visualise the "hardest" and "easiest" samples from train
    ranking_train = np.argsort(train_errors)
    easiest_train = ranking_train[:6]
    hardest_train = ranking_train[-6:]
    plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2)
    plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2)
    # visualise the "hardest" and "easiest" samples from random
#    ranking_random = np.argsort(noisy_errors)
#    easiest_random = ranking_random[:6]
#    hardest_random = ranking_random[-6:]
#    plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2)
#    plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2)
    return True

def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g_tolerance=0.025, use_min=True, C_samples=None):
    """
    Get (average over a few runs) of the reconstruction error per sample
    """
    n_samples = samples.shape[0]
    heuristic_sigma = np.float32(mmd.median_pairwise_distance(samples))
    errors = np.zeros(shape=(n_samples, n_rep))
    for rep in range(n_rep):
        Z, rep_errors, sigma = model.invert(identifier, epoch, samples, n_iter=n_iter, heuristic_sigma=heuristic_sigma, g_tolerance=g_tolerance, C_samples=C_samples)
        errors[:, rep] = rep_errors
    # return min, or average?
    if use_min:
        errors = np.min(errors, axis=1)
    else:
        # use mean
        errors = np.mean(errors, axis=1)
    return errors

# --- visualisation evaluation --- #

def view_digit(identifier, epoch, digit, n_samples=6):
    """
    Generate a bunch of MNIST digits from a CGAN, view them
    """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    if settings['one_hot']:
        assert settings['max_val'] == 1
        assert digit <= settings['cond_dim']
        C_samples = np.zeros(shape=(n_samples, settings['cond_dim']))
        C_samples[:, digit] = 1
    else:
        assert settings['cond_dim'] == 1
        assert digit <= settings['max_val']
        C_samples = np.array([digit]*n_samples).reshape(-1, 1)
    digit_samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples=None, cond_dim=settings['cond_dim'], C_samples=C_samples)
    digit_samples = digit_samples.reshape(n_samples, -1, 1)
    # visualise
    plotting.save_mnist_plot_sample(digit_samples, digit, identifier + '_' + str(epoch) + '_digit_', n_samples)
    return True

def view_interpolation(identifier, epoch, n_steps=6, input_samples=None, e_tolerance=0.01, sigma=3.29286853021):
    """
    If samples: generate interpolation between real points
    Else:
        Sample two points in the latent space, view a linear interpolation between them.
    """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    if input_samples is None:
        # grab two trainng examples
        data = np.load('./experiments/data/' + identifier + '.data.npy').item()
        train = data['samples']['train']
        input_samples = np.random.permutation(train)[:2]
#        Z_sampleA, Z_sampleB = model.sample_Z(2, settings['seq_length'], settings['latent_dim'], 
#                                          settings['use_time'])
        if sigma is None:
            ## gotta get a sigma somehow
            sigma = mmd.median_pairwise_distance(train)
            print('Calcualted heuristic sigma from training data:', sigma)
    Zs, error, _ = model.invert(settings, epoch, input_samples, e_tolerance=e_tolerance)
    Z_sampleA, Z_sampleB = Zs
    Z_samples = plotting.interpolate(Z_sampleA, Z_sampleB, n_steps=n_steps)
    samples = model.sample_trained_model(settings, epoch, Z_samples.shape[0], Z_samples)
    # get distances from generated samples to target samples
    d_A, d_B = [], []
    for sample in samples:
        d_A.append(sample_distance(sample, samples[0], sigma))
        d_B.append(sample_distance(sample, samples[-1], sigma))
    distances = pd.DataFrame({'dA': d_A, 'dB': d_B})
    plotting.save_plot_interpolate(input_samples, samples, epoch, settings['identifier'] + '_epoch' + str(epoch), distances=distances, sigma=sigma)
    return True

def view_latent_vary(identifier, epoch, n_steps=6):
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    Z_sample = model.sample_Z(1, settings['seq_length'], settings['latent_dim'], 
                                      settings['use_time'])[0]
    samples_dim = []
    for dim in range(settings['latent_dim']):
        Z_samples_dim = plotting.vary_latent_dimension(Z_sample, dim, n_steps)
        samples_dim.append(model.sample_trained_model(settings, epoch, Z_samples_dim.shape[0], Z_samples_dim))
    plotting.save_plot_vary_dimension(samples_dim, epoch, settings['identifier'] + '_varydim', n_dim=settings['latent_dim'])
    return True

def view_reconstruction(identifier, epoch, real_samples, tolerance=1):
    """
    Given a set of real samples, find the "closest" latent space points 
    corresponding to them, generate samples from these, visualise!
    """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    Zs, error, sigma = model.invert(settings, epoch, real_samples, tolerance=tolerance)
    plotting.visualise_latent(Zs[0], identifier+'_' + str(epoch) + '_0')
    plotting.visualise_latent(Zs[1], identifier+'_' + str(epoch) + '_1')
    model_samples = model.sample_trained_model(settings, epoch, Zs.shape[0], Zs)
    plotting.save_plot_reconstruct(real_samples, model_samples, settings['identifier'])
    return True

def view_fixed(identifier, epoch, n_samples=6, dim=None):
    """ What happens when we give the same point at each time step? """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], 
                                      settings['use_time'])
    # now, propagate forward the value at time 0 (which time doesn't matter)
    for i in range(1, settings['seq_length']):
        if dim is None:
            Z_samples[:, i, :] = Z_samples[:, 0, :]
        else:
            Z_samples[:, i, dim] = Z_samples[:, 0, dim]
    # now generate
    samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples)
    # now visualise
    plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples)
    return True

def view_params(identifier, epoch):
    """ Visualise weight matrices in the GAN """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    parameters = model.load_parameters(identifier + '_' + str(epoch))
    plotting.plot_parameters(parameters, identifier + '_' + str(epoch))
    return True

# --- to do with samples --- #

def sample_distance(sampleA, sampleB, sigma):
    """
    I know this isn't the best distance measure, alright.
    """
    # RBF!
    gamma = 1 / (2 * sigma**2)
    similarity = np.exp(-gamma*(np.linalg.norm(sampleA - sampleB)**2))
    distance = 1 - similarity
    return distance

### --- TSTR ---- ###

def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X):
    """
    Train a CNN (code copied/adapted from Cristobal's mnist_keras_trts_0_2)
	(ONLY MNIST, ONLY 14x14)
    (ONLY DIGITS UP TO 3)
    """
    print('Training CNN!')
    input_shape = (14,14,1)
    batch_size = 128
    num_classes = 3
    epochs = 1000

    m = Sequential()
    m.add(Conv2D(16, kernel_size=(3, 3),
                        activation='relu',
                        input_shape=input_shape))
    m.add(Conv2D(32, (3, 3), activation='relu'))
    m.add(MaxPooling2D(pool_size=(2, 2)))
    m.add(Dropout(0.25))
    m.add(Flatten())
    m.add(Dense(128, activation='relu'))
    m.add(Dropout(0.5))
    m.add(Dense(num_classes, activation='softmax'))

    m.compile(loss=keras.losses.categorical_crossentropy,
            optimizer=keras.optimizers.Adadelta(),
            metrics=['accuracy'])

    earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto')
    m.fit(np.expand_dims(train_X, axis=-1), train_Y,
            batch_size=batch_size,
            epochs=epochs,
            verbose=1,
            validation_data=(np.expand_dims(vali_X, axis=-1), vali_Y),
            callbacks=[earlyStopping])
    test_predictions = m.predict(np.expand_dims(test_X, axis=-1))
    return test_predictions

def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali=True, CNN=False, reverse=False):
    """
    Either load or generate synthetic training, real test data...
    Load synthetic training, real test data, do multi-class SVM
    (basically just this: http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html)

    If reverse = True: do TRTS
    """
    print('Running TSTR on', identifier, 'at epoch', epoch)
    if vali:
        test_set = 'vali'
    else:
        test_set = 'test'
    if generate:
        data = np.load('./experiments/data/' + identifier + '.data.npy').item()
        samples = data['samples']
        train_X = samples['train']
        test_X = samples[test_set]
        labels = data['labels']
        train_Y = labels['train']
        test_Y = labels[test_set]
        # now sample from the model
        synth_Y = np.tile(train_Y, [duplicate_synth, 1])
        synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)
        # for use in TRTS
        synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)
        synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}
        np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)
    else:
        print('Loading synthetic data from pre-sampled model')
        exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
        test_X, test_Y = exp_data['test_data'], exp_data['test_labels']
        train_X, train_Y = exp_data['train_data'], exp_data['train_labels']
        synth_X, synth_Y = exp_data['synth_data'], exp_data['synth_labels']
    if reverse:
        which_setting = 'trts'
        print('Swapping synthetic test set in for real, to do TRTS!')
        test_X = synth_testX
    else:
        print('Doing normal TSTR')
        which_setting = 'tstr'
    # make classifier
    if not CNN:
        model_choice = 'RF'
         # if multivariate, reshape
        if len(test_X.shape) == 3:
            test_X = test_X.reshape(test_X.shape[0], -1)
        if len(train_X.shape) == 3:
            train_X = train_X.reshape(train_X.shape[0], -1)
        if len(synth_X.shape) == 3:
            synth_X = synth_X.reshape(synth_X.shape[0], -1)
        # if one hot, fix
        if len(synth_Y.shape) > 1 and not synth_Y.shape[1] == 1:
            synth_Y = np.argmax(synth_Y, axis=1)
            train_Y = np.argmax(train_Y, axis=1)
            test_Y = np.argmax(test_Y, axis=1)
       # random forest
        #synth_classifier = SVC(gamma=0.001)
        #real_classifier = SVC(gamma=0.001)
        synth_classifier = RandomForestClassifier(n_estimators=500)
        real_classifier = RandomForestClassifier(n_estimators=500)
        # fit
        real_classifier.fit(train_X, train_Y)
        synth_classifier.fit(synth_X, synth_Y)
        # test on real
        synth_predY = synth_classifier.predict(test_X)
        real_predY = real_classifier.predict(test_X)
    else:
        model_choice = 'CNN'
        synth_predY = train_CNN(synth_X, synth_Y, samples['vali'], labels['vali'], test_X)
        clear_session()
        real_predY = train_CNN(train_X, train_Y, samples['vali'], labels['vali'], test_X)
        clear_session()
        # CNN setting is all 'one-hot'
        test_Y = np.argmax(test_Y, axis=1)
        synth_predY = np.argmax(synth_predY, axis=1)
        real_predY = np.argmax(real_predY, axis=1)
    
    # report on results
    synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y, synth_predY, average='weighted')
    synth_accuracy = accuracy_score(test_Y, synth_predY)
    synth_auprc = 'NaN'
    synth_auroc = 'NaN'
    synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]
    real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y, real_predY, average='weighted')
    real_accuracy = accuracy_score(test_Y, real_predY)
    real_auprc = 'NaN'
    real_auroc = 'NaN'
    real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]
    
    all_scores = synth_scores + real_scores

    if vali:
        report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')
        report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
        report_file.close()
    else:
        report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')
        report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
        report_file.close()
        # visualise results
        try:
            plotting.view_mnist_eval(identifier + '_' + str(epoch), train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY)
        except ValueError:
            print('PLOTTING ERROR')
            pdb.set_trace()
    print(classification_report(test_Y, synth_predY))
    print(classification_report(test_Y, real_predY))
    return synth_f1, real_f1

def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False):
    """
    """
    if vali:
        test_set = 'vali'
    else:
        test_set = 'test'
    data = np.load('./experiments/data/' + identifier + '.data.npy').item()
    samples = data['samples']
    train_X = samples['train']
    test_X = samples[test_set]
    labels = data['labels']
    train_Y = labels['train']
    test_Y = labels[test_set]
    if generate:
        # now sample from the model
        synth_Y = np.tile(train_Y, [duplicate_synth, 1])
        synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)
        # for use in TRTS
        synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)
        synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}
        np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)
    else:
        print('Loading pre-generated data')
        print('WARNING: not implemented for TRTS')
        # get "train" data
        exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
        synth_X = exp_data['samples']
        synth_Y = exp_data['labels']
        n_synth = synth_X.shape[0]
        synth_X = synth_X.reshape(n_synth, -1)
    #    pdb.set_trace()
    #    # ALERT ALERT MODIFYING
    #    synth_X = 2*(synth_X > 0) - 1
    orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item()
    if reverse:
        which_setting = 'trts'
    # visualise distribution of errors for train and test
        print('Swapping synthetic test set in for real, to do TRTS!')
        test_X = synth_testX
    else:
        print('Doing normal TSTR')
        which_setting = 'tstr'
#    # get test data
#    test_X = data['test_X']
#    test_Y = data['test_Y']
    if not CNN:
        model_choice = 'RF'
         # if multivariate, reshape
        if len(test_X.shape) == 3:
            test_X = test_X.reshape(test_X.shape[0], -1)
        if len(train_X.shape) == 3:
            train_X = train_X.reshape(train_X.shape[0], -1)
        if len(synth_X.shape) == 3:
            synth_X = synth_X.reshape(synth_X.shape[0], -1)
    else:
        raise ValueError(CNN)
        model_choice = 'CNN'
    # we will select the best validation set epoch based on F1 score, take average across all the tasks
    score_list = []
    for label in range(synth_Y.shape[1]):
        task = orig_data['Y_columns'][label]
        if vali:
            if not task in ['low_sao2', 'high_heartrate', 'low_respiration']:
                print('Skipping task', task, 'because validation evaluation.')
                continue
        print('Evaluating on task:', task)
        #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)')
        #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label])
        #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label])
        synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label])
        synth_predY = synth_classifier.predict(test_X)
        synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1]
        real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label])
        real_predY = real_classifier.predict(test_X)
        real_predY_prob = real_classifier.predict_proba(test_X)[:, 1]
        #print('(predicted', np.mean(predict), 'positive labels)')
        
        synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted')
        synth_accuracy = accuracy_score(test_Y[:, label], synth_predY)
        synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob)
        synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob)
        synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]

        real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted')
        real_accuracy = accuracy_score(test_Y[:, label], real_predY)
        real_auprc = average_precision_score(test_Y[:, label], real_predY_prob)
        real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob)
        real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]
        
        all_scores = synth_scores + real_scores

        if vali:
            report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')
            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
            report_file.close()
        else:
            report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')
            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
            report_file.close()
        
        print(classification_report(test_Y[:, label], synth_predY))
        print(classification_report(test_Y[:, label], real_predY))
        if task in ['low_sao2', 'high_heartrate', 'low_respiration']:
            score_list.append(synth_auprc + synth_auroc)

    if do_OR:
        raise NotImplementedError
        # do the OR task
        extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4]
        extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5]
        extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6]
        Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T
        Y_OR_test = (Y_OR_test > 0)*1

        extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4]
        extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5]
        extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6]
        Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T
        Y_OR_synth = (Y_OR_synth > 0)*1

        OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP']
        OR_results = []
        for label in range(Y_OR_synth.shape[1]):
            print('task:', OR_names[label])
            print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)')
            m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label])
            predict = m.predict(X_test)
            print('(predicted', np.mean(predict), 'positive labels)')
            accuracy = accuracy_score(Y_OR_test[:, label], predict)
            precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict)
            recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict)
            print(accuracy, precision, recall)
            OR_results.append([accuracy, precision, recall])
    else:
        OR_results = []

    score_across_tasks = np.mean(np.array(score_list))
    return score_across_tasks

def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine, identifier_mnist, epoch_mnist):
    """
    for each experiment:
    - plot a bunch of train examples
    - sample a bunch of generated examples
    - plot all in separate PDFs so i can merge in illustrator

    for sine and rbf, grey background
    MNIST is just MNIST (square though)
    """
    n_samples = 15
    # settings
    settings_rbf = json.load(open('./experiments/settings/' + identifier_rbf + '.txt', 'r'))
    settings_sine = json.load(open('./experiments/settings/' + identifier_sine + '.txt', 'r'))
    settings_mnist = json.load(open('./experiments/settings/' + identifier_mnist + '.txt', 'r'))
    # data
    data_rbf = np.load('./experiments/data/' + identifier_rbf + '.data.npy').item()
    data_sine = np.load('./experiments/data/' + identifier_sine + '.data.npy').item()
    data_mnist = np.load('./experiments/data/' + identifier_mnist + '.data.npy').item()
    train_rbf = data_rbf['samples']['train']
    train_sine = data_sine['samples']['train']
    train_mnist = data_mnist['samples']['train']
    # sample
    samples_rbf = model.sample_trained_model(settings_rbf, epoch_rbf, n_samples)
    samples_sine = model.sample_trained_model(settings_sine, epoch_sine, n_samples)
    samples_mnist = model.sample_trained_model(settings_mnist, epoch_mnist, n_samples)
    # plot them all
    index = 0
    #for sample in np.random.permutation(train_rbf)[:n_samples]:
    #    plotting.nips_plot_rbf(sample, index, 'train')
    #    index += 1
    #for sample in samples_rbf:
    #    plotting.nips_plot_rbf(sample, index, 'GAN')
    #    index += 1
    #for sample in np.random.permutation(train_sine)[:n_samples]:
    #    plotting.nips_plot_sine(sample, index, 'train')
    #    index += 1
    #for sample in samples_sine:
    #    plotting.nips_plot_sine(sample, index, 'GAN')
    #    index += 1
    for sample in np.random.permutation(train_mnist)[:n_samples]:
        plotting.nips_plot_mnist(sample, index, 'train')
        index += 1
    for sample in samples_mnist:
        plotting.nips_plot_mnist(sample, index, 'GAN')
        index += 1
    return True


================================================
FILE: experiments/settings/kdd99.txt
================================================
{
"settings_file": "",
"data": "kdd99",
"seq_length": 30,
"num_signals": 6,
"normalise": false,
"scale": 0.1,
"freq_low": 1.0,
"freq_high": 5.0,
"amplitude_low": 0.1,
"amplitude_high": 0.9,
"multivariate_mnist": false,
"full_mnist": false,
"data_load_from": "",
"resample_rate_in_min": 15,
"hidden_units_g": 100,
"hidden_units_d": 100,
"hidden_units_e": 100,
"kappa": 1,
"latent_dim": 15,
"weight": 0.5,
"degree": 1,
"batch_mean": false,
"learn_scale": false,
"learning_rate": 0.1,
"batch_size": 500,
"num_epochs": 100,
"D_rounds": 1,
"G_rounds": 3,
"E_rounds": 1,
"shuffle": true,
"eval_mul": false,
"eval_an": false,
"eval_single": false,
"wrong_labels": false,
"identifier": "kdd99",
"sub_id": "kdd99",
"dp": false,
"l2norm_bound": 1e-05,
"batches_per_lot": 1,
"dp_sigma": 1e-05,
"use_time": false,
"seq_step": 10,
"num_generated_features": 6
}

================================================
FILE: experiments/settings/kdd99_test.txt
================================================
{
"settings_file": "",
"data": "kdd99_test",
"seq_length": 30,
"num_signals": 6,
"normalise": false,
"scale": 0.1,
"freq_low": 1.0,
"freq_high": 5.0,
"amplitude_low": 0.1,
"amplitude_high": 0.9,
"multivariate_mnist": false,
"full_mnist": false,
"data_load_from": "",
"resample_rate_in_min": 15,
"hidden_units_g": 100,
"hidden_units_d": 100,
"hidden_units_e": 100,
"kappa": 1,
"latent_dim": 15,
"weight": 0.5,
"degree": 1,
"batch_mean": false,
"learn_scale": false,
"learning_rate": 0.1,
"batch_size": 500,
"num_epochs": 100,
"D_rounds": 1,
"G_rounds": 3,
"E_rounds": 1,
"shuffle": true,
"eval_mul": false,
"eval_an": false,
"eval_single": false,
"wrong_labels": false,
"identifier": "kdd99_test",
"sub_id": "kdd99",
"dp": false,
"l2norm_bound": 1e-05,
"batches_per_lot": 1,
"dp_sigma": 1e-05,
"use_time": false,
"seq_step": 10,
"num_generated_features": 6
}

================================================
FILE: mmd.py
================================================
'''
MMD functions implemented in tensorflow.
(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py)
'''
from __future__ import division

import tensorflow as tf

from tf_ops import dot, sq_sum

from scipy.spatial.distance import pdist
from numpy import median, vstack, einsum
import pdb
import numpy as np

_eps=1e-8

################################################################################
### Quadratic-time MMD with Gaussian RBF kernel

def _mix_rbf_kernel(X, Y, sigmas, wts=None):
    """
    """
    if wts is None:
        wts = [1.0] * sigmas.get_shape()[0]

    # debug!
    if len(X.shape) == 2:
        # matrix
        XX = tf.matmul(X, X, transpose_b=True)
        XY = tf.matmul(X, Y, transpose_b=True)
        YY = tf.matmul(Y, Y, transpose_b=True)
    elif len(X.shape) == 3:
        # tensor -- this is computing the Frobenius norm
        XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]])
        XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]])
        YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]])
    else:
        raise ValueError(X)

    X_sqnorms = tf.diag_part(XX)
    Y_sqnorms = tf.diag_part(YY)

    r = lambda x: tf.expand_dims(x, 0)
    c = lambda x: tf.expand_dims(x, 1)

    K_XX, K_XY, K_YY = 0, 0, 0
    for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts):
        gamma = 1 / (2 * sigma**2)
        K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
        K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
        K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))

    return K_XX, K_XY, K_YY, tf.reduce_sum(wts)


def rbf_mmd2(X, Y, sigma=1, biased=True):
    return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased)


def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):
    K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
    return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)


def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
    return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased)


def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
    K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
    return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)


################################################################################
### Helper functions to compute variances based on kernel matrices


def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = tf.cast(K_XX.get_shape()[0], tf.float32)
    n = tf.cast(K_YY.get_shape()[0], tf.float32)

    if biased:
        mmd2 = (tf.reduce_sum(K_XX) / (m * m)
              + tf.reduce_sum(K_YY) / (n * n)
              - 2 * tf.reduce_sum(K_XY) / (m * n))
    else:
        if const_diagonal is not False:
            trace_X = m * const_diagonal
            trace_Y = n * const_diagonal
        else:
            trace_X = tf.trace(K_XX)
            trace_Y = tf.trace(K_YY)

        mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1))
              + (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1))
              - 2 * tf.reduce_sum(K_XY) / (m * n))

    return mmd2


def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
                    min_var_est=_eps):
    mmd2, var_est = _mmd2_and_variance(
        K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)
    ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))
    return mmd2, ratio


def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = tf.cast(K_XX.get_shape()[0], tf.float32)  # Assumes X, Y are same shape

    ### Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        const_diagonal = tf.cast(const_diagonal, tf.float32)
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
    else:
        diag_X = tf.diag_part(K_XX)
        diag_Y = tf.diag_part(K_YY)

        sum_diag_X = tf.reduce_sum(diag_X)
        sum_diag_Y = tf.reduce_sum(diag_Y)

        sum_diag2_X = sq_sum(diag_X)
        sum_diag2_Y = sq_sum(diag_Y)

    Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
    Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
    K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
    K_XY_sums_1 = tf.reduce_sum(K_XY, 1)

    Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
    Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
    K_XY_sum = tf.reduce_sum(K_XY_sums_0)

    Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
    Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
    K_XY_2_sum  = sq_sum(K_XY)

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
              + (Kt_YY_sum + sum_diag_Y) / (m * m)
              - 2 * K_XY_sum / (m * m))
    else:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
              + (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
              - 2 * K_XY_sum / (m * m))

    var_est = (
          2 / (m**2 * (m-1)**2) * (
              2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
            + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
        - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
        + 4*(m-2) / (m**3 * (m-1)**2) * (
              sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
        - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
        - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
        + 8 / (m**3 * (m-1)) * (
              1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
            - dot(Kt_XX_sums, K_XY_sums_1)
            - dot(Kt_YY_sums, K_XY_sums_0))
    )

    return mmd2, var_est


### additions from stephanie, for convenience

def median_pairwise_distance(X, Y=None):
    """
    Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.
    If Y is missing, just calculate it from X:
        this is so that, during training, as Y changes, we can use a fixed
        bandwidth (and save recalculating this each time we evaluated the mmd)
    At the end of training, we do the heuristic "correctly" by including
    both X and Y.

    Note: most of this code is assuming tensorflow, but X and Y are just ndarrays
    """
    if Y is None:
        Y = X       # this is horrendously inefficient, sorry
   
    if len(X.shape) == 2:
        # matrix
        X_sqnorms = einsum('...i,...i', X, X)
        Y_sqnorms = einsum('...i,...i', Y, Y)
        XY = einsum('ia,ja', X, Y)
    elif len(X.shape) == 3:
        # tensor -- this is computing the Frobenius norm
        X_sqnorms = einsum('...ij,...ij', X, X)
        Y_sqnorms = einsum('...ij,...ij', Y, Y)
        XY = einsum('iab,jab', X, Y)
    else:
        raise ValueError(X)

    distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1))
    return median(distances)


def median_pairwise_distance_o(X, Y=None):
    """
    Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.
    If Y is missing, just calculate it from X:
        this is so that, during training, as Y changes, we can use a fixed
        bandwidth (and save recalculating this each time we evaluated the mmd)
    At the end of training, we do the heuristic "correctly" by including
    both X and Y.

    Note: most of this code is assuming tensorflow, but X and Y are just ndarrays
    """
    if Y is None:
        Y = X  # this is horrendously inefficient, sorry

    if len(X.shape) == 2:
        # matrix
        X_sqnorms = np.einsum('...i,...i', X, X)
        Y_sqnorms = np.einsum('...i,...i', Y, Y)
        XY = np.einsum('ia,ja', X, Y)
    elif len(X.shape) == 3:
        # tensor -- this is computing the Frobenius norm
        X_sqnorms = np.einsum('...ij,...ij', X, X)  # reduce the tensor shape
        Y_sqnorms = np.einsum('...ij,...ij', Y, Y)
        XY = np.einsum('iab,jab', X, Y)  # X*Y^T??
    else:
        raise ValueError(X)

    distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2 * XY + Y_sqnorms.reshape(1, -1))
    distances = distances.reshape(-1, 1)
    distances = distances[~np.isnan(distances)]
    return np.median(distances)

================================================
FILE: mod_core_rnn_cell_impl.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#modified by Stephanie (@corcra) to enable initializing the bias term in lstm """
# ==============================================================================

"""Module implementing RNN Cells."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import contextlib
import hashlib
import math
import numbers

from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import embedding_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variable_scope as vs

from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
#from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell
from tensorflow.python.ops.rnn_cell_impl import RNNCell

from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest


_BIAS_VARIABLE_NAME = "biases"
_WEIGHTS_VARIABLE_NAME = "weights"


@contextlib.contextmanager
def _checked_scope(cell, scope, reuse=None, **kwargs):
  if reuse is not None:
    kwargs["reuse"] = reuse
  with vs.variable_scope(scope, **kwargs) as checking_scope:
    scope_name = checking_scope.name
    if hasattr(cell, "_scope"):
      cell_scope = cell._scope  # pylint: disable=protected-access
      if cell_scope.name != checking_scope.name:
        raise ValueError(
            "Attempt to reuse RNNCell %s with a different variable scope than "
            "its first use.  First use of cell was with scope '%s', this "
            "attempt is with scope '%s'.  Please create a new instance of the "
            "cell if you would like it to use a different set of weights.  "
            "If before you were using: MultiRNNCell([%s(...)] * num_layers), "
            "change to: MultiRNNCell([%s(...) for _ in range(num_layers)]).  "
            "If before you were using the same cell instance as both the "
            "forward and reverse cell of a bidirectional RNN, simply create "
            "two instances (one for forward, one for reverse).  "
            "In May 2017, we will start transitioning this cell's behavior "
            "to use existing stored weights, if any, when it is called "
            "with scope=None (which can lead to silent model degradation, so "
            "this error will remain until then.)"
            % (cell, cell_scope.name, scope_name, type(cell).__name__,
               type(cell).__name__))
    else:
      weights_found = False
      try:
        with vs.variable_scope(checking_scope, reuse=True):
          vs.get_variable(_WEIGHTS_VARIABLE_NAME)
        weights_found = True
      except ValueError:
        pass
      if weights_found and reuse is None:
        raise ValueError(
            "Attempt to have a second RNNCell use the weights of a variable "
            "scope that already has weights: '%s'; and the cell was not "
            "constructed as %s(..., reuse=True).  "
            "To share the weights of an RNNCell, simply "
            "reuse it in your second calculation, or create a new one with "
            "the argument reuse=True." % (scope_name, type(cell).__name__))

    # Everything is OK.  Update the cell's scope and yield it.
    cell._scope = checking_scope  # pylint: disable=protected-access
    yield checking_scope


class BasicRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation
    self._reuse = reuse

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
    with _checked_scope(self, scope or "basic_rnn_cell", reuse=self._reuse):
      output = self._activation(
          _linear([inputs, state], self._num_units, True))
    return output, output


class GRUCell(RNNCell):
  """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078)."""

  def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation
    self._reuse = reuse

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    with _checked_scope(self, scope or "gru_cell", reuse=self._reuse):
      with vs.variable_scope("gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        value = sigmoid(_linear(
          [inputs, state], 2 * self._num_units, True, 1.0))
        r, u = array_ops.split(
            value=value,
            num_or_size_splits=2,
            axis=1)
      with vs.variable_scope("candidate"):
        c = self._activation(_linear([inputs, r * state],
                                     self._num_units, True))
      new_h = u * state + (1 - u) * c
    return new_h, new_h


_LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h"))


class LSTMStateTuple(_LSTMStateTuple):
  """Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state.

  Stores two elements: `(c, h)`, in that order.

  Only used when `state_is_tuple=True`.
  """
  __slots__ = ()

  @property
  def dtype(self):
    (c, h) = self
    if not c.dtype == h.dtype:
      raise TypeError("Inconsistent internal state: %s vs %s" %
                      (str(c.dtype), str(h.dtype)))
    return c.dtype


class BasicLSTMCell(RNNCell):
  """Basic LSTM recurrent network cell.

  The implementation is based on: http://arxiv.org/abs/1409.2329.

  We add forget_bias (default: 1) to the biases of the forget gate in order to
  reduce the scale of forgetting in the beginning of the training.

  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.

  For advanced models, please use the full LSTMCell that follows.
  """

  def __init__(self, num_units, forget_bias=1.0, input_size=None,
               state_is_tuple=True, activation=tanh, reuse=None):
    """Initialize the basic LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
      input_size: Deprecated and unused.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    """
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation
    self._reuse = reuse

  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse):
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        c, h = state
      else:
        c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)
      concat = _linear([inputs, h], 4 * self._num_units, True)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

      new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
               self._activation(j))
      new_h = self._activation(new_c) * sigmoid(o)

      if self._state_is_tuple:
        new_state = LSTMStateTuple(new_c, new_h)
      else:
        new_state = array_ops.concat([new_c, new_h], 1)
      return new_h, new_state


class LSTMCell(RNNCell):
  """Long short-term memory unit (LSTM) recurrent network cell.

  The default non-peephole implementation is based on:

    http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf

  S. Hochreiter and J. Schmidhuber.
  "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.

  The peephole implementation is based on:

    https://research.google.com/pubs/archive/43905.pdf

  Hasim Sak, Andrew Senior, and Francoise Beaufays.
  "Long short-term memory recurrent neural network architectures for
   large scale acoustic modeling." INTERSPEECH, 2014.

  The class uses optional peep-hole connections, optional cell clipping, and
  an optional projection layer.
  """

  def __init__(self, num_units, input_size=None,
               use_peepholes=False, cell_clip=None,
               initializer=None, bias_start=0.0, num_proj=None, proj_clip=None,
               num_unit_shards=None, num_proj_shards=None,
               forget_bias=1.0, state_is_tuple=True,
               activation=tanh, reuse=None):
    """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      input_size: Deprecated and unused.
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      bias_start: (optional) The VALUE to initialize the bias to, in
        the linear call
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    """
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    if num_unit_shards is not None or num_proj_shards is not None:
      logging.warn(
          "%s: The num_unit_shards and proj_unit_shards parameters are "
          "deprecated and will be removed in Jan 2017.  "
          "Use a variable scope with a partitioner instead.", self)

    self._num_units = num_units
    self._use_peepholes = use_peepholes
    self._cell_clip = cell_clip
    self._initializer = initializer
    self._bias_start = bias_start
    self._num_proj = num_proj
    self._proj_clip = proj_clip
    self._num_unit_shards = num_unit_shards
    self._num_proj_shards = num_proj_shards
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation
    self._reuse = reuse

    if num_proj:
      self._state_size = (
          LSTMStateTuple(num_units, num_proj)
          if state_is_tuple else num_units + num_proj)
      self._output_size = num_proj
    else:
      self._state_size = (
          LSTMStateTuple(num_units, num_units)
          if state_is_tuple else 2 * num_units)
      self._output_size = num_units

  @property
  def state_size(self):
    return self._state_size

  @property
  def output_size(self):
    return self._output_size

  def __call__(self, inputs, state, scope=None):
    """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
      scope: VariableScope for the created subgraph; defaults to "lstm_cell".

    Returns:
      A tuple containing:

      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj

    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = inputs.dtype
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
    with _checked_scope(self, scope or "lstm_cell",
                        initializer=self._initializer,
                        reuse=self._reuse) as unit_scope:
      if self._num_unit_shards is not None:
        unit_scope.set_partitioner(
            partitioned_variables.fixed_size_partitioner(
                self._num_unit_shards))
      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, bias_start=self._bias_start)
      i, j, f, o = array_ops.split(
          value=lstm_matrix, num_or_size_splits=4, axis=1)
      # Diagonal connections
      if self._use_peepholes:
        with vs.variable_scope(unit_scope) as projection_scope:
          if self._num_unit_shards is not None:
            projection_scope.set_partitioner(None)
          w_f_diag = vs.get_variable(
              "w_f_diag", shape=[self._num_units], dtype=dtype)
          w_i_diag = vs.get_variable(
              "w_i_diag", shape=[self._num_units], dtype=dtype)
          w_o_diag = vs.get_variable(
              "w_o_diag", shape=[self._num_units], dtype=dtype)

      if self._use_peepholes:
        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
      else:
        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
             self._activation(j))

      if self._cell_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        # pylint: enable=invalid-unary-operand-type
      if self._use_peepholes:
        m = sigmoid(o + w_o_diag * c) * self._activation(c)
      else:
        m = sigmoid(o) * self._activation(c)

      if self._num_proj is not None:
        with vs.variable_scope("projection") as proj_scope:
          if self._num_proj_shards is not None:
            proj_scope.set_partitioner(
                partitioned_variables.fixed_size_partitioner(
                    self._num_proj_shards))
          m = _linear(m, self._num_proj, bias=False)

        if self._proj_clip is not None:
          # pylint: disable=invalid-unary-operand-type
          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
          # pylint: enable=invalid-unary-operand-type

    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                 array_ops.concat([c, m], 1))
    return m, new_state


class OutputProjectionWrapper(RNNCell):
  """Operator adding an output projection to the given cell.

  Note: in many cases it may be more efficient to not use this wrapper,
  but instead concatenate the whole sequence of your outputs in time,
  do the projection on this batch-concatenated sequence, then split it
  if needed or directly feed into a softmax.
  """

  def __init__(self, cell, output_size, reuse=None):
    """Create a cell with output projection.

    Args:
      cell: an RNNCell, a projection to output_size is added to it.
      output_size: integer, the size of the output after projection.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.

    Raises:
      TypeError: if cell is not an RNNCell.
      ValueError: if output_size is not positive.
    """
    if not isinstance(cell, RNNCell):
      raise TypeError("The parameter cell is not RNNCell.")
    if output_size < 1:
      raise ValueError("Parameter output_size must be > 0: %d." % output_size)
    self._cell = cell
    self._output_size = output_size
    self._reuse = reuse

  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._output_size

  def zero_state(self, batch_size, dtype):
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      return self._cell.zero_state(batch_size, dtype)

  def __call__(self, inputs, state, scope=None):
    """Run the cell and output projection on inputs, starting from state."""
    output, res_state = self._cell(inputs, state)
    # Default scope: "OutputProjectionWrapper"
    with _checked_scope(self, scope or "output_projection_wrapper",
                        reuse=self._reuse):
      projected = _linear(output, self._output_size, True)
    return projected, res_state


class InputProjectionWrapper(RNNCell):
  """Operator adding an input projection to the given cell.

  Note: in many cases it may be more efficient to not use this wrapper,
  but instead concatenate the whole sequence of your inputs in time,
  do the projection on this batch-concatenated sequence, then split it.
  """

  def __init__(self, cell, num_proj, input_size=None):
    """Create a cell with input projection.

    Args:
      cell: an RNNCell, a projection of inputs is added before it.
      num_proj: Python integer.  The dimension to project to.
      input_size: Deprecated and unused.

    Raises:
      TypeError: if cell is not an RNNCell.
    """
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    if not isinstance(cell, RNNCell):
      raise TypeError("The parameter cell is not RNNCell.")
    self._cell = cell
    self._num_proj = num_proj

  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._cell.output_size

  def zero_state(self, batch_size, dtype):
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      return self._cell.zero_state(batch_size, dtype)

  def __call__(self, inputs, state, scope=None):
    """Run the input projection and then the cell."""
    # Default scope: "InputProjectionWrapper"
    with vs.variable_scope(scope or "input_projection_wrapper"):
      projected = _linear(inputs, self._num_proj, True)
    return self._cell(projected, state)


def _enumerated_map_structure(map_fn, *args, **kwargs):
  ix = [0]
  def enumerated_fn(*inner_args, **inner_kwargs):
    r = map_fn(ix[0], *inner_args, **inner_kwargs)
    ix[0] += 1
    return r
  return nest.map_structure(enumerated_fn, *args, **kwargs)


class DropoutWrapper(RNNCell):
  """Operator adding dropout to inputs and outputs of the given cell."""

  def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
               state_keep_prob=1.0, variational_recurrent=False,
               input_size=None, dtype=None, seed=None):
    """Create a cell with added input, state, and/or output dropout.

    If `variational_recurrent` is set to `True` (**NOT** the default behavior),
    then the the same dropout mask is applied at every step, as described in:

    Y. Gal, Z Ghahramani.  "A Theoretically Grounded Application of Dropout in
    Recurrent Neural Networks".  https://arxiv.org/abs/1512.05287

    Otherwise a different dropout mask is applied at every time step.

    Args:
      cell: an RNNCell, a projection to output_size is added to it.
      input_keep_prob: unit Tensor or float between 0 and 1, input keep
        probability; if it is constant and 1, no input dropout will be added.
      output_keep_prob: unit Tensor or float between 0 and 1, output keep
        probability; if it is constant and 1, no output dropout will be added.
      state_keep_prob: unit Tensor or float between 0 and 1, output keep
        probability; if it is constant and 1, no output dropout will be added.
        State dropout is performed on the *output* states of the cell.
      variational_recurrent: Python bool.  If `True`, then the same
        dropout pattern is applied across all time steps per run call.
        If this parameter is set, `input_size` **must** be provided.
      input_size: (optional) (possibly nested tuple of) `TensorShape` objects
        containing the depth(s) of the input tensors expected to be passed in to
        the `DropoutWrapper`.  Required and used **iff**
         `variational_recurrent = True` and `input_keep_prob < 1`.
      dtype: (optional) The `dtype` of the input, state, and output tensors.
        Required and used **iff** `variational_recurrent = True`.
      seed: (optional) integer, the randomness seed.

    Raises:
      TypeError: if cell is not an RNNCell.
      ValueError: if any of the keep_probs are not between 0 and 1.
    """
    if not isinstance(cell, RNNCell):
      raise TypeError("The parameter cell is not a RNNCell.")
    with ops.name_scope("DropoutWrapperInit"):
      def tensor_and_const_value(v):
        tensor_value = ops.convert_to_tensor(v)
        const_value = tensor_util.constant_value(tensor_value)
        return (tensor_value, const_value)
      for prob, attr in [(input_keep_prob, "input_keep_prob"),
                         (state_keep_prob, "state_keep_prob"),
                         (output_keep_prob, "output_keep_prob")]:
        tensor_prob, const_prob = tensor_and_const_value(prob)
        if const_prob is not None:
          if const_prob < 0 or const_prob > 1:
            raise ValueError("Parameter %s must be between 0 and 1: %d"
                             % (attr, const_prob))
          setattr(self, "_%s" % attr, float(const_prob))
        else:
          setattr(self, "_%s" % attr, tensor_prob)

    # Set cell, variational_recurrent, seed before running the code below
    self._cell = cell
    self._variational_recurrent = variational_recurrent
    self._seed = seed

    self._recurrent_input_noise = None
    self._recurrent_state_noise = None
    self._recurrent_output_noise = None

    if variational_recurrent:
      if dtype is None:
        raise ValueError(
            "When variational_recurrent=True, dtype must be provided")

      def convert_to_batch_shape(s):
        # Prepend a 1 for the batch dimension; for recurrent
        # variational dropout we use the same dropout mask for all
        # batch elements.
        return array_ops.concat(
            ([1], tensor_shape.TensorShape(s).as_list()), 0)

      def batch_noise(s, inner_seed):
        shape = convert_to_batch_shape(s)
        return random_ops.random_uniform(shape, seed=inner_seed, dtype=dtype)

      if (not isinstance(self._input_keep_prob, numbers.Real) or
          self._input_keep_prob < 1.0):
        if input_size is None:
          raise ValueError(
              "When variational_recurrent=True and input_keep_prob < 1.0 or "
              "is unknown, input_size must be provided")
        self._recurrent_input_noise = _enumerated_map_structure(
            lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)),
            input_size)
      self._recurrent_state_noise = _enumerated_map_structure(
          lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)),
          cell.state_size)
      self._recurrent_output_noise = _enumerated_map_str
Download .txt
gitextract_fejrc7rh/

├── .gitattributes
├── AD.py
├── AD_Invert.py
├── DR_discriminator.py
├── README.md
├── RGAN.py
├── data_utils.py
├── differential_privacy/
│   ├── dp_sgd/
│   │   └── dp_optimizer/
│   │       ├── dp_optimizer.py
│   │       ├── sanitizer.py
│   │       └── utils.py
│   └── privacy_accountant/
│       └── tf/
│           └── accountant.py
├── eugenium_mmd.py
├── eval.py
├── experiments/
│   ├── parameters/
│   │   ├── kdd99_30_0.npy
│   │   ├── kdd99_30_1.npy
│   │   ├── kdd99_30_10.npy
│   │   ├── kdd99_30_11.npy
│   │   ├── kdd99_30_12.npy
│   │   ├── kdd99_30_13.npy
│   │   ├── kdd99_30_14.npy
│   │   ├── kdd99_30_15.npy
│   │   ├── kdd99_30_16.npy
│   │   ├── kdd99_30_17.npy
│   │   ├── kdd99_30_18.npy
│   │   ├── kdd99_30_19.npy
│   │   ├── kdd99_30_2.npy
│   │   ├── kdd99_30_20.npy
│   │   ├── kdd99_30_21.npy
│   │   ├── kdd99_30_22.npy
│   │   ├── kdd99_30_3.npy
│   │   ├── kdd99_30_4.npy
│   │   ├── kdd99_30_5.npy
│   │   ├── kdd99_30_6.npy
│   │   ├── kdd99_30_7.npy
│   │   ├── kdd99_30_8.npy
│   │   └── kdd99_30_9.npy
│   ├── plots/
│   │   └── gs/
│   │       └── kdd99_gs_real.npy
│   └── settings/
│       ├── kdd99.txt
│       └── kdd99_test.txt
├── mmd.py
├── mod_core_rnn_cell_impl.py
├── model.py
├── plotting.py
├── tf_ops.py
└── utils.py
Download .txt
SYMBOL INDEX (216 symbols across 16 files)

FILE: AD.py
  class myADclass (line 49) | class myADclass():
    method __init__ (line 50) | def __init__(self, epoch, settings=settings, samples=samples, labels=l...
    method ADfunc (line 56) | def ADfunc(self):

FILE: AD_Invert.py
  class myADclass (line 45) | class myADclass():
    method __init__ (line 46) | def __init__(self, epoch, settings=settings, samples=samples, labels=l...
    method ADfunc (line 52) | def ADfunc(self):

FILE: DR_discriminator.py
  function anomaly_detection_plot (line 10) | def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier):
  function detection_Comb (line 40) | def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao):
  function detection_logits_I (line 87) | def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao):
  function detection_statistic_I (line 155) | def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao):
  function detection_D_I (line 222) | def detection_D_I(DD, L_mb, I_mb, seq_step, tao):
  function detection_R_D_I (line 290) | def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam):
  function detection_R_I (line 364) | def detection_R_I(Gs, T_mb, L_mb, seq_step, tao):
  function sample_detection (line 432) | def sample_detection(D_test, L_mb, tao):
  function CUSUM_det (line 464) | def CUSUM_det(spe_n, spe_a, labels):
  function SPE (line 531) | def SPE(X, pc):
  function generator_o (line 549) | def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated...
  function discriminator_o (line 589) | def discriminator_o(x, hidden_units_d, reuse=False, parameters=None):
  function invert (line 615) | def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1,
  function dis_trained_model (line 722) | def dis_trained_model(settings, samples, para_path):
  function dis_D_model (line 761) | def dis_D_model(settings, samples, para_path):

FILE: data_utils.py
  function swat (line 24) | def swat(seq_length, seq_step, num_signals, randomize=False):
  function swat_birgan (line 90) | def swat_birgan(seq_length, seq_step, num_signals, randomize=False):
  function swat_test (line 133) | def swat_test(seq_length, seq_step, num_signals, randomize=False):
  function swat_birgan_test (line 200) | def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False):
  function wadi (line 245) | def wadi(seq_length, seq_step, num_signals, randomize=False):
  function wadi_test (line 302) | def wadi_test(seq_length, seq_step, num_signals, randomize=False):
  function kdd99 (line 357) | def kdd99(seq_length, seq_step, num_signals):
  function kdd99_test (line 409) | def kdd99_test(seq_length, seq_step, num_signals):
  function get_samples_and_labels (line 468) | def get_samples_and_labels(settings):
  function get_data (line 544) | def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_...
  function get_batch (line 574) | def get_batch(samples, batch_size, batch_idx, labels=None):
  function split (line 589) | def split(samples, proportions, normalise=False, scale=False, labels=Non...

FILE: differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py
  class DPGradientDescentOptimizer (line 30) | class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):
    method __init__ (line 34) | def __init__(self, learning_rate, eps_delta, sanitizer,
    method compute_sanitized_gradients (line 72) | def compute_sanitized_gradients(self, loss, var_list=None,
    method minimize (line 110) | def minimize(self, loss, global_step=None, var_list=None,

FILE: differential_privacy/dp_sgd/dp_optimizer/sanitizer.py
  class AmortizedGaussianSanitizer (line 39) | class AmortizedGaussianSanitizer(object):
    method __init__ (line 47) | def __init__(self, accountant, default_option):
    method set_option (line 59) | def set_option(self, tensor_name, option):
    method sanitize (line 69) | def sanitize(self, x, eps_delta, sigma=None,

FILE: differential_privacy/dp_sgd/dp_optimizer/utils.py
  class LayerParameters (line 29) | class LayerParameters(object):
    method __init__ (line 31) | def __init__(self):
  class ConvParameters (line 42) | class ConvParameters(object):
    method __init__ (line 44) | def __init__(self):
  class NetworkParameters (line 62) | class NetworkParameters(object):
    method __init__ (line 64) | def __init__(self):
  function GetTensorOpName (line 73) | def GetTensorOpName(x):
  function BuildNetwork (line 91) | def BuildNetwork(inputs, network_parameters):
  function VaryRate (line 199) | def VaryRate(start, end, saturate_epochs, epoch):
  function BatchClipByL2norm (line 223) | def BatchClipByL2norm(t, upper_bound, name=None):
  function SoftThreshold (line 254) | def SoftThreshold(t, threshold_ratio, name=None):
  function AddGaussianNoise (line 283) | def AddGaussianNoise(t, sigma, name=None):
  function GenerateBinomialTable (line 300) | def GenerateBinomialTable(m):

FILE: differential_privacy/privacy_accountant/tf/accountant.py
  class AmortizedAccountant (line 50) | class AmortizedAccountant(object):
    method __init__ (line 59) | def __init__(self, total_examples):
    method accumulate_privacy_spending (line 73) | def accumulate_privacy_spending(self, eps_delta, unused_sigma,
    method get_privacy_spent (line 108) | def get_privacy_spent(self, sess, target_eps=None):
  class MomentsAccountant (line 127) | class MomentsAccountant(object):
    method __init__ (line 179) | def __init__(self, total_examples, moment_orders=32):
    method _compute_log_moment (line 200) | def _compute_log_moment(self, sigma, q, moment_order):
    method accumulate_privacy_spending (line 212) | def accumulate_privacy_spending(self, unused_eps_delta,
    method _compute_delta (line 241) | def _compute_delta(self, log_moments, eps):
    method _compute_eps (line 261) | def _compute_eps(self, log_moments, delta):
    method get_privacy_spent (line 270) | def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):
  class GaussianMomentsAccountant (line 299) | class GaussianMomentsAccountant(MomentsAccountant):
    method __init__ (line 332) | def __init__(self, total_examples, moment_orders=32):
    method _differential_moments (line 342) | def _differential_moments(self, sigma, s, t):
    method _compute_log_moment (line 379) | def _compute_log_moment(self, sigma, q, moment_order):
  class DummyAccountant (line 407) | class DummyAccountant(object):
    method accumulate_privacy_spending (line 410) | def accumulate_privacy_spending(self, *unused_args):
    method get_privacy_spent (line 413) | def get_privacy_spent(self, unused_sess, **unused_kwargs):

FILE: eugenium_mmd.py
  function my_kernel (line 21) | def my_kernel(X, Y, sigma):
  function MMD_3_Sample_Test (line 36) | def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=F...
  function MMD_Diff_Var (line 92) | def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz):
  function grbf (line 151) | def grbf(x1, x2, sigma):
  function kernelwidthPair (line 174) | def kernelwidthPair(x1, x2):
  function kernelwidth (line 204) | def kernelwidth(Zmed):
  function MMD_unbiased (line 229) | def MMD_unbiased(Kxx, Kyy, Kxy):

FILE: eval.py
  function assert_same_data (line 33) | def assert_same_data(A, B):
  function model_memorisation (line 48) | def model_memorisation(identifier, epoch, max_samples=2000, tstr=False):
  function model_comparison (line 128) | def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99):
  function get_reconstruction_errors (line 160) | def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_s...
  function error_per_sample (line 261) | def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g...
  function view_digit (line 281) | def view_digit(identifier, epoch, digit, n_samples=6):
  function view_interpolation (line 301) | def view_interpolation(identifier, epoch, n_steps=6, input_samples=None,...
  function view_latent_vary (line 332) | def view_latent_vary(identifier, epoch, n_steps=6):
  function view_reconstruction (line 343) | def view_reconstruction(identifier, epoch, real_samples, tolerance=1):
  function view_fixed (line 356) | def view_fixed(identifier, epoch, n_samples=6, dim=None):
  function view_params (line 373) | def view_params(identifier, epoch):
  function sample_distance (line 382) | def sample_distance(sampleA, sampleB, sigma):
  function train_CNN (line 394) | def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X):
  function TSTR_mnist (line 432) | def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali...
  function TSTR_eICU (line 542) | def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do...
  function NIPS_toy_plot (line 682) | def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine...

FILE: mmd.py
  function _mix_rbf_kernel (line 21) | def _mix_rbf_kernel(X, Y, sigmas, wts=None):
  function rbf_mmd2 (line 57) | def rbf_mmd2(X, Y, sigma=1, biased=True):
  function mix_rbf_mmd2 (line 61) | def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):
  function rbf_mmd2_and_ratio (line 66) | def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
  function mix_rbf_mmd2_and_ratio (line 70) | def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
  function _mmd2 (line 79) | def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
  function _mmd2_and_ratio (line 102) | def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
  function _mmd2_and_variance (line 110) | def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=Fa...
  function median_pairwise_distance (line 172) | def median_pairwise_distance(X, Y=None):
  function median_pairwise_distance_o (line 203) | def median_pairwise_distance_o(X, Y=None):

FILE: mod_core_rnn_cell_impl.py
  function _checked_scope (line 57) | def _checked_scope(cell, scope, reuse=None, **kwargs):
  class BasicRNNCell (line 103) | class BasicRNNCell(RNNCell):
    method __init__ (line 106) | def __init__(self, num_units, input_size=None, activation=tanh, reuse=...
    method state_size (line 114) | def state_size(self):
    method output_size (line 118) | def output_size(self):
    method __call__ (line 121) | def __call__(self, inputs, state, scope=None):
  class GRUCell (line 129) | class GRUCell(RNNCell):
    method __init__ (line 132) | def __init__(self, num_units, input_size=None, activation=tanh, reuse=...
    method state_size (line 140) | def state_size(self):
    method output_size (line 144) | def output_size(self):
    method __call__ (line 147) | def __call__(self, inputs, state, scope=None):
  class LSTMStateTuple (line 168) | class LSTMStateTuple(_LSTMStateTuple):
    method dtype (line 178) | def dtype(self):
  class BasicLSTMCell (line 186) | class BasicLSTMCell(RNNCell):
    method __init__ (line 200) | def __init__(self, num_units, forget_bias=1.0, input_size=None,
    method state_size (line 228) | def state_size(self):
    method output_size (line 233) | def output_size(self):
    method __call__ (line 236) | def __call__(self, inputs, state, scope=None):
  class LSTMCell (line 260) | class LSTMCell(RNNCell):
    method __init__ (line 282) | def __init__(self, num_units, input_size=None,
    method state_size (line 357) | def state_size(self):
    method output_size (line 361) | def output_size(self):
    method __call__ (line 364) | def __call__(self, inputs, state, scope=None):
  class OutputProjectionWrapper (line 459) | class OutputProjectionWrapper(RNNCell):
    method __init__ (line 468) | def __init__(self, cell, output_size, reuse=None):
    method state_size (line 491) | def state_size(self):
    method output_size (line 495) | def output_size(self):
    method zero_state (line 498) | def zero_state(self, batch_size, dtype):
    method __call__ (line 502) | def __call__(self, inputs, state, scope=None):
  class InputProjectionWrapper (line 512) | class InputProjectionWrapper(RNNCell):
    method __init__ (line 520) | def __init__(self, cell, num_proj, input_size=None):
    method state_size (line 539) | def state_size(self):
    method output_size (line 543) | def output_size(self):
    method zero_state (line 546) | def zero_state(self, batch_size, dtype):
    method __call__ (line 550) | def __call__(self, inputs, state, scope=None):
  function _enumerated_map_structure (line 558) | def _enumerated_map_structure(map_fn, *args, **kwargs):
  class DropoutWrapper (line 567) | class DropoutWrapper(RNNCell):
    method __init__ (line 570) | def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
    method _gen_seed (line 667) | def _gen_seed(self, salt_prefix, index):
    method state_size (line 675) | def state_size(self):
    method output_size (line 679) | def output_size(self):
    method zero_state (line 682) | def zero_state(self, batch_size, dtype):
    method _variational_recurrent_dropout_value (line 686) | def _variational_recurrent_dropout_value(
    method _dropout (line 698) | def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob):
    method __call__ (line 710) | def __call__(self, inputs, state, scope=None):
  class ResidualWrapper (line 731) | class ResidualWrapper(RNNCell):
    method __init__ (line 734) | def __init__(self, cell):
    method state_size (line 743) | def state_size(self):
    method output_size (line 747) | def output_size(self):
    method zero_state (line 750) | def zero_state(self, batch_size, dtype):
    method __call__ (line 754) | def __call__(self, inputs, state, scope=None):
  class DeviceWrapper (line 780) | class DeviceWrapper(RNNCell):
    method __init__ (line 783) | def __init__(self, cell, device):
    method state_size (line 796) | def state_size(self):
    method output_size (line 800) | def output_size(self):
    method zero_state (line 803) | def zero_state(self, batch_size, dtype):
    method __call__ (line 807) | def __call__(self, inputs, state, scope=None):
  class EmbeddingWrapper (line 813) | class EmbeddingWrapper(RNNCell):
    method __init__ (line 822) | def __init__(self, cell, embedding_classes, embedding_size, initialize...
    method state_size (line 852) | def state_size(self):
    method output_size (line 856) | def output_size(self):
    method zero_state (line 859) | def zero_state(self, batch_size, dtype):
    method __call__ (line 863) | def __call__(self, inputs, state, scope=None):
  class MultiRNNCell (line 890) | class MultiRNNCell(RNNCell):
    method __init__ (line 893) | def __init__(self, cells, state_is_tuple=True):
    method state_size (line 922) | def state_size(self):
    method output_size (line 929) | def output_size(self):
    method zero_state (line 932) | def zero_state(self, batch_size, dtype):
    method __call__ (line 941) | def __call__(self, inputs, state, scope=None):
  class _SlimRNNCell (line 966) | class _SlimRNNCell(RNNCell):
    method __init__ (line 969) | def __init__(self, cell_fn):
    method state_size (line 998) | def state_size(self):
    method output_size (line 1002) | def output_size(self):
    method __call__ (line 1005) | def __call__(self, inputs, state, scope=None):
  function _linear (line 1011) | def _linear(args, output_size, bias, bias_start=0.0, scope=None):

FILE: model.py
  function sample_Z (line 25) | def sample_Z(batch_size, seq_length, latent_dim, use_time=False, use_noi...
  function sample_T (line 34) | def sample_T(batch_size, batch_idx):
  function sample_TT (line 46) | def sample_TT(batch_size):
  function train_epoch (line 57) | def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_so...
  function GAN_loss (line 88) | def GAN_loss(Z, X, generator_settings, discriminator_settings):
  function GAN_solvers (line 114) | def GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_example...
  function create_placeholders (line 150) | def create_placeholders(batch_size, seq_length, latent_dim, num_signals):
  function generator (line 157) | def generator(z, hidden_units_g, seq_length, batch_size, num_signals, re...
  function discriminator (line 209) | def discriminator(x, hidden_units_d, seq_length, batch_size, reuse=False...
  function display_batch_progression (line 250) | def display_batch_progression(j, id_max):
  function dump_parameters (line 261) | def dump_parameters(identifier, sess):
  function load_parameters (line 275) | def load_parameters(identifier):

FILE: plotting.py
  function plot_label (line 10) | def plot_label(label, id):
  function visualise_at_epoch (line 20) | def visualise_at_epoch(vis_sample, data, predict_labels, epoch,
  function save_plot_sample (line 48) | def save_plot_sample(samples, idx, identifier, n_samples=16, num_epochs=...
  function save_plot_interpolate (line 77) | def save_plot_interpolate(input_samples, samples, idx, identifier,  num_...
  function reconstruction_errors (line 130) | def reconstruction_errors(identifier, train_errors, vali_errors,
  function save_plot_reconstruct (line 158) | def save_plot_reconstruct(real_samples, model_samples, identifier):
  function save_plot_vary_dimension (line 180) | def save_plot_vary_dimension(samples_list, idx, identifier, n_dim):
  function interpolate (line 213) | def interpolate(sampleA, sampleB=None, n_steps=6):
  function vary_latent_dimension (line 224) | def vary_latent_dimension(sample, dimension, n_steps=6):
  function plot_sine_evaluation (line 235) | def plot_sine_evaluation(real_samples, fake_samples, idx, identifier):
  function plot_trace (line 270) | def plot_trace(identifier, xmax=250, final=False, dp=False):
  function save_samples (line 386) | def save_samples(vis_sample, identifier, epoch):
  function save_samples_real (line 392) | def save_samples_real(vis_real, identifier):
  function save_mnist_plot_sample (line 398) | def save_mnist_plot_sample(samples, idx, identifier, n_samples, labels=N...
  function visualise_latent (line 436) | def visualise_latent(Z, identifier):
  function plot_parameters (line 456) | def plot_parameters(parameters, identifier):
  function view_mnist_eval (line 491) | def view_mnist_eval(identifier, train_X, train_Y, synth_X, synth_Y, test...
  function nips_plot_rbf (line 544) | def nips_plot_rbf(sample, index, which='train'):
  function nips_plot_sine (line 570) | def nips_plot_sine(sample, index, which='train'):
  function nips_plot_mnist (line 599) | def nips_plot_mnist(sample, index, which='train'):

FILE: tf_ops.py
  function sq_sum (line 5) | def sq_sum(t, name=None):
  function dot (line 12) | def dot(x, y, name=None):

FILE: utils.py
  function rgan_options_parser (line 6) | def rgan_options_parser():
  function load_settings_from_file (line 93) | def load_settings_from_file(settings):
Condensed preview — 45 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (265K chars).
[
  {
    "path": ".gitattributes",
    "chars": 66,
    "preview": "# Auto detect text files and perform LF normalization\n* text=auto\n"
  },
  {
    "path": "AD.py",
    "chars": 6507,
    "preview": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nimport model\r\nfrom mod_core_rnn_cell_impl import L"
  },
  {
    "path": "AD_Invert.py",
    "chars": 7849,
    "preview": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nfrom mod_core_rnn_cell_impl import LSTMCell  # mod"
  },
  {
    "path": "DR_discriminator.py",
    "chars": 25532,
    "preview": "import numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import hsv_to_rgb\nimpo"
  },
  {
    "path": "README.md",
    "chars": 1790,
    "preview": "# -- Multivariate Anomaly Detection for Time Series Data with GANs -- #\n\n# MAD-GAN\n\nThis repository contains code for th"
  },
  {
    "path": "RGAN.py",
    "chars": 8594,
    "preview": "import numpy as np\r\nimport tensorflow as tf\r\nimport pdb\r\nimport random\r\nimport json\r\nfrom scipy.stats import mode\r\n\r\nimp"
  },
  {
    "path": "data_utils.py",
    "chars": 24447,
    "preview": "import numpy as np\nimport pandas as pd\nimport pdb\nimport re\nfrom time import time\nimport json\nimport random\n\nimport mode"
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py",
    "chars": 10050,
    "preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/sanitizer.py",
    "chars": 4587,
    "preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/utils.py",
    "chars": 10974,
    "preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
  },
  {
    "path": "differential_privacy/privacy_accountant/tf/accountant.py",
    "chars": 17725,
    "preview": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some "
  },
  {
    "path": "eugenium_mmd.py",
    "chars": 6961,
    "preview": "'''\nCode taken from: https://github.com/eugenium/mmd\n(modified slightly for efficiency/PEP by Stephanie Hyland)\n\nPython "
  },
  {
    "path": "eval.py",
    "chars": 36174,
    "preview": "#!/usr/bin/env ipython\n# Evaluation of models\n#\n\nimport json\nimport pdb\nimport numpy as np\nimport pandas as pd\nfrom euge"
  },
  {
    "path": "experiments/settings/kdd99.txt",
    "chars": 891,
    "preview": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,\r\n\"fr"
  },
  {
    "path": "experiments/settings/kdd99_test.txt",
    "chars": 901,
    "preview": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99_test\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,"
  },
  {
    "path": "mmd.py",
    "chars": 8101,
    "preview": "'''\nMMD functions implemented in tensorflow.\n(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py)\n'"
  },
  {
    "path": "mod_core_rnn_cell_impl.py",
    "chars": 41293,
    "preview": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"L"
  },
  {
    "path": "model.py",
    "chars": 12352,
    "preview": "import tensorflow as tf\nimport numpy as np\n# from data_utils import get_batch\nimport data_utils\nimport pdb\nimport json\ni"
  },
  {
    "path": "plotting.py",
    "chars": 23410,
    "preview": "import numpy as np\nimport matplotlib as mpl\nmpl.use('Agg')\nimport matplotlib.pyplot as plt\nimport pdb\nfrom time import t"
  },
  {
    "path": "tf_ops.py",
    "chars": 681,
    "preview": "### from https://github.com/eugenium/MMD/blob/master/tf_ops.py\nimport tensorflow as tf\n\n\ndef sq_sum(t, name=None):\n    \""
  },
  {
    "path": "utils.py",
    "chars": 6099,
    "preview": "#!/usr/bin/env ipython\n# Utility functions that don't fit in other scripts\nimport argparse\nimport json\n\ndef rgan_options"
  }
]

// ... and 24 more files (download for full content)

About this extraction

This page contains the full source code of the LiDan456/MAD-GANs GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 45 files (249.0 KB), approximately 68.1k tokens, and a symbol index with 216 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!