[
  {
    "path": ".gitattributes",
    "content": "# Auto detect text files and perform LF normalization\n* text=auto\n"
  },
  {
    "path": "AD.py",
    "content": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nimport model\r\nfrom mod_core_rnn_cell_impl import LSTMCell  # modified to allow initializing bias in lstm\r\n\r\nimport utils\r\nimport eval\r\nimport DR_discriminator\r\nimport data_utils\r\n\r\n# from pyod.utils.utility import *\r\nfrom sklearn.utils.validation import *\r\nfrom sklearn.metrics.classification import *\r\nfrom sklearn.metrics.ranking import *\r\nfrom time import time\r\n\r\nbegin = time()\r\n\r\n\"\"\"\r\nHere, only the discriminator was used to do the anomaly detection\r\n\"\"\"\r\n\r\n# --- get settings --- #\r\n# parse command line arguments, or use defaults\r\nparser = utils.rgan_options_parser()\r\nsettings = vars(parser.parse_args())\r\n# if a settings file is specified, it overrides command line arguments/defaults\r\nif settings['settings_file']: settings = utils.load_settings_from_file(settings)\r\n\r\n# --- get data, split --- #\r\ndata_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'\r\nprint('Loading data from', data_path)\r\nsettings[\"eval_single\"] = False\r\nsettings[\"eval_an\"] = False\r\nsamples, labels, index = data_utils.get_data(settings[\"data\"], settings[\"seq_length\"], settings[\"seq_step\"],\r\n                                             settings[\"num_signals\"], settings[\"sub_id\"], settings[\"eval_single\"],\r\n                                             settings[\"eval_an\"], data_path)\r\n# --- save settings, data --- #\r\n# no need\r\nprint('Ready to run with settings:')\r\nfor (k, v) in settings.items(): print(v, '\\t', k)\r\n# add the settings to local environment\r\n# WARNING: at this point a lot of variables appear\r\nlocals().update(settings)\r\njson.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)\r\n\r\nclass myADclass():\r\n    def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):\r\n        self.epoch = epoch\r\n        self.settings = settings\r\n        self.samples = samples\r\n        self.labels = labels\r\n        self.index = index\r\n    def ADfunc(self):\r\n        num_samples_t = self.samples.shape[0]\r\n        print('sample_shape:', self.samples.shape[0])\r\n        print('num_samples_t', num_samples_t)\r\n\r\n        # -- only discriminate one batch for one time -- #\r\n        D_test = np.empty([num_samples_t, self.settings['seq_length'], 1])\r\n        DL_test = np.empty([num_samples_t, self.settings['seq_length'], 1])\r\n        L_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])\r\n        I_mb = np.empty([num_samples_t, self.settings['seq_length'], 1])\r\n        batch_times = num_samples_t // self.settings['batch_size']\r\n        for batch_idx in range(0, num_samples_t // self.settings['batch_size']):\r\n            # print('batch_idx:{}\r\n            # display batch progress\r\n            model.display_batch_progression(batch_idx, batch_times)\r\n            start_pos = batch_idx * self.settings['batch_size']\r\n            end_pos = start_pos + self.settings['batch_size']\r\n            T_mb = self.samples[start_pos:end_pos, :, :]\r\n            L_mmb = self.labels[start_pos:end_pos, :, :]\r\n            I_mmb = self.index[start_pos:end_pos, :, :]\r\n            para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(\r\n                self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'\r\n            D_t, L_t = DR_discriminator.dis_trained_model(self.settings, T_mb, para_path)\r\n            D_test[start_pos:end_pos, :, :] = D_t\r\n            DL_test[start_pos:end_pos, :, :] = L_t\r\n            L_mb[start_pos:end_pos, :, :] = L_mmb\r\n            I_mb[start_pos:end_pos, :, :] = I_mmb\r\n\r\n        start_pos = (num_samples_t // self.settings['batch_size']) * self.settings['batch_size']\r\n        end_pos = start_pos + self.settings['batch_size']\r\n        size = samples[start_pos:end_pos, :, :].shape[0]\r\n        fill = np.ones([self.settings['batch_size'] - size, samples.shape[1], samples.shape[2]])\r\n        batch = np.concatenate([samples[start_pos:end_pos, :, :], fill], axis=0)\r\n        para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(\r\n            self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'\r\n        D_t, L_t = DR_discriminator.dis_trained_model(self.settings, batch, para_path)\r\n        L_mmb = self.labels[start_pos:end_pos, :, :]\r\n        I_mmb = self.index[start_pos:end_pos, :, :]\r\n        D_test[start_pos:end_pos, :, :] = D_t[:size, :, :]\r\n        DL_test[start_pos:end_pos, :, :] = L_t[:size, :, :]\r\n        L_mb[start_pos:end_pos, :, :] = L_mmb\r\n        I_mb[start_pos:end_pos, :, :] = I_mmb\r\n\r\n        results = np.zeros([18, 4])\r\n        for i in range(2, 8):\r\n            tao = 0.1 * i\r\n            Accu2, Pre2, Rec2, F12 = DR_discriminator.detection_Comb(\r\n                DL_test, L_mb, I_mb, self.settings['seq_step'], tao)\r\n            print('seq_length:', self.settings['seq_length'])\r\n            print('Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'\r\n                  .format(self.epoch, tao, Accu2, Pre2, Rec2, F12))\r\n            results[i - 2, :] = [Accu2, Pre2, Rec2, F12]\r\n\r\n            Accu3, Pre3, Rec3, F13 = DR_discriminator.detection_Comb(\r\n                D_test, L_mb, I_mb, self.settings['seq_step'], tao)\r\n            print('seq_length:', self.settings['seq_length'])\r\n            print('Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'\r\n                  .format(self.epoch, tao, Accu3, Pre3, Rec3, F13))\r\n            results[i - 2+6, :] = [Accu3, Pre3, Rec3, F13]\r\n\r\n            Accu5, Pre5, Rec5, F15 = DR_discriminator.sample_detection(D_test, L_mb, tao)\r\n            print('seq_length:', self.settings['seq_length'])\r\n            print('sample-wise-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}'\r\n                  .format(self.epoch, tao, Accu5, Pre5, Rec5, F15))\r\n            results[i - 2+12, :] = [Accu5, Pre5, Rec5, F15]\r\n\r\n        return results\r\n\r\nif __name__ == \"__main__\":\r\n    print('Main Starting...')\r\n\r\n    Results = np.empty([settings['num_epochs'], 18, 4])\r\n\r\n    for epoch in range(settings['num_epochs']):\r\n    # for epoch in range(50, 60):\r\n        ob = myADclass(epoch)\r\n        Results[epoch, :, :] = ob.ADfunc()\r\n\r\n    # res_path = './experiments/plots/Results' + '_' + settings['sub_id'] + '_' + str(\r\n    #     settings['seq_length']) + '.npy'\r\n    # np.save(res_path, Results)\r\n\r\n    print('Main Terminating...')\r\n    end = time() - begin\r\n    print('Testing terminated | Training time=%d s' % (end))"
  },
  {
    "path": "AD_Invert.py",
    "content": "import tensorflow as tf\r\nimport numpy as np\r\nimport pdb\r\nimport json\r\nfrom mod_core_rnn_cell_impl import LSTMCell  # modified to allow initializing bias in lstm\r\n\r\nimport data_utils\r\nimport plotting\r\nimport model\r\nimport mmd\r\nimport utils\r\nimport eval\r\nimport DR_discriminator\r\n\r\nfrom differential_privacy.dp_sgd.dp_optimizer import dp_optimizer\r\nfrom differential_privacy.dp_sgd.dp_optimizer import sanitizer\r\nfrom differential_privacy.privacy_accountant.tf import accountant\r\n\r\n\"\"\"\r\nHere, both the discriminator and generator were used to do the anomaly detection\r\n\"\"\"\r\n\r\n# --- get settings --- #\r\n# parse command line arguments, or use defaults\r\nparser = utils.rgan_options_parser()\r\nsettings = vars(parser.parse_args())\r\n# if a settings file is specified, it overrides command line arguments/defaults\r\nif settings['settings_file']: settings = utils.load_settings_from_file(settings)\r\n\r\n# --- get data, split --- #\r\ndata_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'\r\nprint('Loading data from', data_path)\r\nsamples, labels, index = data_utils.get_data(settings[\"data\"], settings[\"seq_length\"], settings[\"seq_step\"],\r\n                                             settings[\"num_signals\"], settings[\"sub_id\"], settings[\"eval_single\"],\r\n                                             settings[\"eval_an\"], data_path)\r\n# --- save settings, data --- #\r\n# no need\r\nprint('Ready to run with settings:')\r\nfor (k, v) in settings.items(): print(v, '\\t', k)\r\n# add the settings to local environment\r\n# WARNING: at this point a lot of variables appear\r\nlocals().update(settings)\r\njson.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)\r\n\r\nclass myADclass():\r\n    def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index):\r\n        self.epoch = epoch\r\n        self.settings = settings\r\n        self.samples = samples\r\n        self.labels = labels\r\n        self.index = index\r\n    def ADfunc(self):\r\n        num_samples_t = self.samples.shape[0]\r\n        t_size = 500\r\n        T_index = np.random.choice(num_samples_t, size=t_size, replace=False)\r\n        print('sample_shape:', self.samples.shape[0])\r\n        print('num_samples_t', num_samples_t)\r\n\r\n        # -- only discriminate one batch for one time -- #\r\n        D_test = np.empty([t_size, self.settings['seq_length'], 1])\r\n        DL_test = np.empty([t_size, self.settings['seq_length'], 1])\r\n        GG = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])\r\n        T_samples = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']])\r\n        L_mb = np.empty([t_size, self.settings['seq_length'], 1])\r\n        I_mb = np.empty([t_size, self.settings['seq_length'], 1])\r\n        for batch_idx in range(0, t_size):\r\n            # print('epoch:{}'.format(self.epoch))\r\n            # print('batch_idx:{}'.format(batch_idx))\r\n            # display batch progress\r\n            model.display_batch_progression(batch_idx, t_size)\r\n            T_mb = self.samples[T_index[batch_idx], :, :]\r\n            L_mmb = self.labels[T_index[batch_idx], :, :]\r\n            I_mmb = self.index[T_index[batch_idx], :, :]\r\n            para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str(\r\n                self.settings['seq_length']) + '_' + str(self.epoch) + '.npy'\r\n            D_t, L_t = DR_discriminator.dis_D_model(self.settings, T_mb, para_path)\r\n            Gs, Zs, error_per_sample, heuristic_sigma = DR_discriminator.invert(self.settings, T_mb, para_path,\r\n                                                                                g_tolerance=None,\r\n                                                                                e_tolerance=0.1, n_iter=None,\r\n                                                                                max_iter=1000,\r\n                                                                                heuristic_sigma=None)\r\n            GG[batch_idx, :, :] = Gs\r\n            T_samples[batch_idx, :, :] = T_mb\r\n            D_test[batch_idx, :, :] = D_t\r\n            DL_test[batch_idx, :, :] = L_t\r\n            L_mb[batch_idx, :, :] = L_mmb\r\n            I_mb[batch_idx, :, :] = I_mmb\r\n\r\n        # -- use self-defined evaluation functions -- #\r\n        # -- test different tao values for the detection function -- #\r\n        results = np.empty([5, 5])\r\n        # for i in range(2, 8):\r\n        #     tao = 0.1 * i\r\n        tao = 0.5\r\n        lam = 0.8\r\n        Accu1, Pre1, Rec1, F11, FPR1, D_L1 = DR_discriminator.detection_D_I(DL_test, L_mb, I_mb, self.settings['seq_step'], tao)\r\n        print('seq_length:', self.settings['seq_length'])\r\n        print('D:Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'\r\n              .format(self.epoch, tao, Accu1, Pre1, Rec1, F11, FPR1))\r\n        results[0, :] = [Accu1, Pre1, Rec1, F11, FPR1]\r\n\r\n        Accu2, Pre2, Rec2, F12, FPR2, D_L2 = DR_discriminator.detection_D_I(D_test, L_mb, I_mb, self.settings['seq_step'], tao)\r\n        print('seq_length:', self.settings['seq_length'])\r\n        print('D:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'\r\n              .format(self.epoch, tao, Accu2, Pre2, Rec2, F12, FPR2))\r\n        results[1, :] = [Accu2, Pre2, Rec2, F12, FPR2]\r\n\r\n        Accu3, Pre3, Rec3, F13, FPR3, D_L3 = DR_discriminator.detection_R_D_I(DL_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)\r\n        print('seq_length:', self.settings['seq_length'])\r\n        print('RD:Comb-logits_based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'\r\n            .format(self.epoch, tao, Accu3, Pre3, Rec3, F13, FPR3))\r\n        results[2, :] = [Accu3, Pre3, Rec3, F13, FPR3]\r\n\r\n        Accu4, Pre4, Rec4, F14, FPR4, D_L4 = DR_discriminator.detection_R_D_I(D_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam)\r\n        print('seq_length:', self.settings['seq_length'])\r\n        print('RD:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'\r\n              .format(self.epoch, tao, Accu4, Pre4, Rec4, F14, FPR4))\r\n        results[3, :] = [Accu4, Pre4, Rec4, F14, FPR4]\r\n\r\n        Accu5, Pre5, Rec5, F15, FPR5, D_L5 = DR_discriminator.detection_R_I(GG, T_samples, L_mb, self.settings['seq_step'],tao)\r\n        print('seq_length:', self.settings['seq_length'])\r\n        print('G:Comb-sample-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}'\r\n              .format(self.epoch, tao, Accu5, Pre5, Rec5, F15, FPR5))\r\n        results[4, :] = [Accu5, Pre5, Rec5, F15, FPR5]\r\n\r\n        return results, GG, D_test, DL_test\r\n\r\n\r\n\r\nif __name__ == \"__main__\":\r\n    print('Main Starting...')\r\n\r\n    Results = np.empty([settings['num_epochs'], 5, 5])\r\n\r\n    t_size = 500\r\n    D_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])\r\n    DL_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1])\r\n    GG = np.empty([settings['num_epochs'], t_size, settings['seq_length'], settings['num_signals']])\r\n\r\n    for epoch in range(settings['num_epochs']):\r\n    # for epoch in range(1):\r\n        ob = myADclass(epoch)\r\n        Results[epoch, :, :], GG[epoch, :, :, :], D_test[epoch, :, :, :], DL_test[epoch, :, :, :] = ob.ADfunc()\r\n\r\n    res_path = './experiments/plots/Results_Invert' + '_' + settings['sub_id'] + '_' + str(\r\n        settings['seq_length']) + '.npy'\r\n    np.save(res_path, Results)\r\n\r\n    dg_path = './experiments/plots/DG_Invert' + '_' + settings['sub_id'] + '_' + str(\r\n        settings['seq_length']) + '_'\r\n    np.save(dg_path + 'D_test.npy', D_test)\r\n    np.save(dg_path + 'DL_test.npy', DL_test)\r\n    np.save(dg_path + 'GG.npy', DL_test)\r\n\r\n    print('Main Terminating...')"
  },
  {
    "path": "DR_discriminator.py",
    "content": "import numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import hsv_to_rgb\nimport model\nimport mmd\nfrom mod_core_rnn_cell_impl import LSTMCell\nfrom sklearn.metrics import precision_recall_fscore_support\n\ndef anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier):\n\n    aa = D_test.shape[0]\n    bb = D_test.shape[1]\n    D_L = D_L.reshape([aa, bb, -1])\n\n    x_points = np.arange(bb)\n\n    fig, ax = plt.subplots(4, 4, sharex=True)\n    for m in range(4):\n        for n in range(4):\n            D = D_test[n * 4 + m, :, :]\n            T = T_mb[n * 4 + m, :, :]\n            L = L_mb[n * 4 + m, :, :]\n            DL = D_L[n * 4 + m, :, :]\n            ax[m, n].plot(x_points, D, '--g', label='Pro')\n            ax[m, n].plot(x_points, T, 'b', label='Data')\n            ax[m, n].plot(x_points, L, 'k', label='Label')\n            ax[m, n].plot(x_points, DL, 'r', label='Label')\n            ax[m, n].set_ylim(-1, 1)\n    for n in range(4):\n        ax[-1, n].xaxis.set_ticks(range(0, bb, int(bb/6)))\n    fig.suptitle(epoch)\n    fig.subplots_adjust(hspace=0.15)\n    fig.savefig(\"./experiments/plots/DR_dis/\" + identifier + \"_epoch\" + str(epoch).zfill(4) + \".png\")\n    plt.clf()\n    plt.close()\n\n    return True\n\ndef detection_Comb(Label_test, L_mb, I_mb, seq_step, tao):\n    aa = Label_test.shape[0]\n    bb = Label_test.shape[1]\n\n    LL = (aa-1)*seq_step+bb\n\n    Label_test = abs(Label_test.reshape([aa, bb]))\n    L_mb = L_mb .reshape([aa, bb])\n    I_mb = I_mb .reshape([aa, bb])\n    D_L = np.zeros([LL, 1])\n    L_L = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i*10+j)\n            D_L[i*seq_step+j] += Label_test[i, j]\n            L_L[i * seq_step + j] += L_mb[i, j]\n            Count[i * seq_step + j] += 1\n\n    D_L /= Count\n    L_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if D_L[i] > tao:\n            # true/negative\n            D_L[i] = 0\n        else:\n            # false/positive\n            D_L[i] = 1\n\n    cc = (D_L == L_L)\n    # print('D_L:', D_L)\n    # print('L_L:', L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n\n    print('N:', N)\n\n    Accu = float((N / LL) * 100)\n\n    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')\n\n    return Accu, precision, recall, f1,\n\n\ndef detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao):\n    aa = DL_test.shape[0]\n    bb = DL_test.shape[1]\n\n    LL = (aa-1)*seq_step+bb\n\n    DL_test = abs(DL_test.reshape([aa, bb]))\n    L_mb = L_mb .reshape([aa, bb])\n    I_mb = I_mb .reshape([aa, bb])\n    D_L = np.zeros([LL, 1])\n    L_L = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i*10+j)\n            D_L[i*seq_step+j] += DL_test[i, j]\n            L_L[i * seq_step + j] += L_mb[i, j]\n            Count[i * seq_step + j] += 1\n\n    D_L /= Count\n    L_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if D_L[i] > tao:\n            # true/negative\n            D_L[i] = 0\n        else:\n            # false/positive\n            D_L[i] = 1\n\n        A = D_L[i]\n        B = L_L[i]\n        if A == 1 and B == 1:\n            TP += 1\n        elif A == 1 and B == 0:\n            FP += 1\n        elif A == 0 and B == 0:\n            TN += 1\n        elif A == 0 and B == 1:\n            FN += 1\n\n\n    cc = (D_L == L_L)\n    # print('D_L:', D_L)\n    # print('L_L:', L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n\n    print('N:', N)\n\n    Accu = float((N / LL) * 100)\n\n    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')\n\n    # true positive among all the detected positive\n    # Pre = (100 * TP) / (TP + FP + 1)\n    # # true positive among all the real positive\n    # Rec = (100 * TP) / (TP + FN + 1)\n    # # The F1 score is the harmonic average of the precision and recall,\n    # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate--false alarm rate\n    FPR = (100 * FP) / (FP + TN+1)\n\n    return Accu, precision, recall, f1, FPR, D_L\n\ndef detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao):\n    # point-wise detection for one dimension\n\n    aa = D_test.shape[0]\n    bb = D_test.shape[1]\n\n    LL = (aa-1) * seq_step + bb\n    # print('aa:', aa)\n    # print('bb:', bb)\n    # print('LL:', LL)\n\n    D_test = D_test.reshape([aa, bb])\n    L_mb = L_mb.reshape([aa, bb])\n    I_mb = I_mb.reshape([aa, bb])\n    D_L = np.zeros([LL, 1])\n    L_L = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i * 10 + j)\n            D_L[i * seq_step + j] += D_test[i, j]\n            L_L[i * seq_step + j] += L_mb[i, j]\n            Count[i * seq_step + j] += 1\n\n    D_L /= Count\n    L_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if D_L[i] > tao:\n            # true/negative\n            D_L[i] = 0\n        else:\n            # false/positive\n            D_L[i] = 1\n\n        A = D_L[i]\n        B = L_L[i]\n        if A == 1 and B == 1:\n            TP += 1\n        elif A == 1 and B == 0:\n            FP += 1\n        elif A == 0 and B == 0:\n            TN += 1\n        elif A == 0 and B == 1:\n            FN += 1\n\n    cc = (D_L == L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n    Accu = float((N / LL) * 100)\n\n    precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary')\n\n    # true positive among all the detected positive\n    # Pre = (100 * TP) / (TP + FP + 1)\n    # # true positive among all the real positive\n    # Rec = (100 * TP) / (TP + FN + 1)\n    # # The F1 score is the harmonic average of the precision and recall,\n    # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate--false alarm rate\n    FPR = (100 * FP) / (FP + TN)\n\n    return Accu, precision, recall, f1, FPR, D_L\n\ndef detection_D_I(DD, L_mb, I_mb, seq_step, tao):\n    # point-wise detection for one dimension\n\n    aa = DD.shape[0]\n    bb = DD.shape[1]\n\n    LL = (aa-1)*seq_step+bb\n\n    DD = abs(DD.reshape([aa, bb]))\n    L_mb = L_mb .reshape([aa, bb])\n    I_mb = I_mb .reshape([aa, bb])\n    D_L = np.zeros([LL, 1])\n    L_L = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i*10+j)\n            D_L[i*10+j] += DD[i, j]\n            L_L[i * 10 + j] += L_mb[i, j]\n            Count[i * 10 + j] += 1\n\n    D_L /= Count\n    L_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if D_L[i] > tao:\n            # true/negative\n            D_L[i] = 0\n        else:\n            # false/positive\n            D_L[i] = 1\n\n        A = D_L[i]\n        B = L_L[i]\n        if A == 1 and B == 1:\n            TP += 1\n        elif A == 1 and B == 0:\n            FP += 1\n        elif A == 0 and B == 0:\n            TN += 1\n        elif A == 0 and B == 1:\n            FN += 1\n\n\n    cc = (D_L == L_L)\n    # print('D_L:', D_L)\n    # print('L_L:', L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n\n    print('N:', N)\n\n    Accu = float((N / LL) * 100)\n\n    # true positive among all the detected positive\n    Pre = (100 * TP) / (TP + FP + 1)\n    # true positive among all the real positive\n    Rec = (100 * TP) / (TP + FN + 1)\n    # The F1 score is the harmonic average of the precision and recall,\n    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate--false alarm rate\n    FPR = (100 * FP) / (FP + TN+1)\n\n    return Accu, Pre, Rec, F1, FPR, D_L\n\ndef detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam):\n    # point-wise detection for one dimension\n    # (1-lambda)*R(x)+lambda*D(x)\n    # lambda=0.5?\n    # D_test, Gs, T_mb, L_mb  are of same size\n\n    R = np.absolute(Gs - T_mb)\n    R = np.mean(R, axis=2)\n    aa = DD.shape[0]\n    bb = DD.shape[1]\n\n    LL = (aa - 1) * seq_step + bb\n\n    DD = abs(DD.reshape([aa, bb]))\n    DD = 1-DD\n    L_mb = L_mb.reshape([aa, bb])\n    R = R.reshape([aa, bb])\n\n    D_L = np.zeros([LL, 1])\n    R_L = np.zeros([LL, 1])\n    L_L = np.zeros([LL, 1])\n    L_pre = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i*10+j)\n            D_L[i * 10 + j] += DD[i, j]\n            L_L[i * 10 + j] += L_mb[i, j]\n            R_L[i * 10 + j] += R[i, j]\n            Count[i * 10 + j] += 1\n    D_L /= Count\n    L_L /= Count\n    R_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if (1-lam)*R_L[i] + lam*D_L[i] > tao:\n            # false\n            L_pre[i] = 1\n        else:\n            # true\n            L_pre[i] = 0\n\n        A = L_pre[i]\n        # print('A:', A)\n        B = L_L[i]\n        # print('B:', B)\n        if A == 1 and B == 1:\n            TP += 1\n        elif A == 1 and B == 0:\n            FP += 1\n        elif A == 0 and B == 0:\n            TN += 1\n        elif A == 0 and B == 1:\n            FN += 1\n\n    cc = (L_pre == L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n    Accu = float((N / (aa*bb)) * 100)\n\n    # true positive among all the detected positive\n    Pre = (100 * TP) / (TP + FP + 1)\n    # true positive among all the real positive\n    Rec = (100 * TP) / (TP + FN + 1)\n    # The F1 score is the harmonic average of the precision and recall,\n    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate\n    FPR = (100 * FP) / (FP + TN+1)\n\n    return Accu, Pre, Rec, F1, FPR, L_pre\n\ndef detection_R_I(Gs, T_mb, L_mb, seq_step, tao):\n    # point-wise detection for one dimension\n    # (1-lambda)*R(x)+lambda*D(x)\n    # lambda=0.5?\n    # D_test, Gs, T_mb, L_mb  are of same size\n\n    R = np.absolute(Gs - T_mb)\n    R = np.mean(R, axis=2)\n    aa = R.shape[0]\n    bb = R.shape[1]\n\n    LL = (aa - 1) * seq_step + bb\n\n    L_mb = L_mb.reshape([aa, bb])\n    R = R.reshape([aa, bb])\n\n    L_L = np.zeros([LL, 1])\n    R_L = np.zeros([LL, 1])\n    L_pre = np.zeros([LL, 1])\n    Count = np.zeros([LL, 1])\n    for i in range(0, aa):\n        for j in range(0, bb):\n            # print('index:', i*10+j)\n            L_L[i * 10 + j] += L_mb[i, j]\n            R_L[i * 10 + j] += R[i, j]\n            Count[i * 10 + j] += 1\n    L_L /= Count\n    R_L /= Count\n\n    TP, TN, FP, FN = 0, 0, 0, 0\n\n    for i in range(LL):\n        if R_L[i] > tao:\n            # false\n            L_pre[i] = 1\n        else:\n            # true\n            L_pre[i] = 0\n\n        A = L_pre[i]\n        B = L_L[i]\n        if A == 1 and B == 1:\n            TP += 1\n        elif A == 1 and B == 0:\n            FP += 1\n        elif A == 0 and B == 0:\n            TN += 1\n        elif A == 0 and B == 1:\n            FN += 1\n\n    cc = (L_pre == L_L)\n    cc = list(cc.reshape([-1]))\n    N = cc.count(True)\n    Accu = float((N / (aa*bb)) * 100)\n\n    # true positive among all the detected positive\n    Pre = (100 * TP) / (TP + FP + 1)\n    # true positive among all the real positive\n    Rec = (100 * TP) / (TP + FN + 1)\n    # The F1 score is the harmonic average of the precision and recall,\n    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate\n    FPR = (100 * FP) / (FP + TN+1)\n\n    return Accu, Pre, Rec, F1, FPR, L_pre\n\n\ndef sample_detection(D_test, L_mb, tao):\n    # sample-wise detection for one dimension\n\n    aa = D_test.shape[0]\n    bb = D_test.shape[1]\n\n    D_test = D_test.reshape([aa, bb])\n    L_mb = L_mb.reshape([aa, bb])\n    L = np.sum(L_mb, 1)\n    # NN = 0-10\n    L[L > 0] = 1\n\n    D_L = np.empty([aa, ])\n\n    for i in range(aa):\n        if np.mean(D_test[i, :]) > tao:\n            # true/negative\n            D_L[i] = 0\n        else:\n            # false/positive\n            D_L[i] = 1\n\n    cc = (D_L == L)\n    # cc = list(cc)\n    N = list(cc).count(True)\n    Accu = float((N / (aa)) * 100)\n\n    precision, recall, f1, _ = precision_recall_fscore_support(L, D_L, average='binary')\n\n    return Accu, precision, recall, f1\n\n\ndef CUSUM_det(spe_n, spe_a, labels):\n\n    mu = np.mean(spe_n)\n    sigma = np.std(spe_n)\n\n    kk = 3*sigma\n    H = 15*sigma\n    print('H:', H)\n\n    tar = np.mean(spe_a)\n\n    mm = spe_a.shape[0]\n\n    SH = np.empty([mm, ])\n    SL = np.empty([mm, ])\n\n    for i in range(mm):\n        SH[-1] = 0\n        SL[-1] = 0\n        SH[i] = max(0, SH[i-1]+spe_a[i]-(tar+kk))\n        SL[i] = min(0, SL[i-1]+spe_a[i]-(tar-kk))\n\n\n    count = np.empty([mm, ])\n    TP = 0\n    TN = 0\n    FP = 0\n    FN = 0\n    for i in range(mm):\n        A = SH[i]\n        B = SL[i]\n        AA = H\n        BB = -H\n        if A <= AA and B >= BB:\n            count[i] = 0\n        else:\n            count[i] = 1\n\n        C = count[i]\n        D = labels[i]\n        if C == 1 and D == 1:\n            TP += 1\n        elif C == 1 and D == 0:\n            FP += 1\n        elif C == 0 and D == 0:\n            TN += 1\n        elif C == 0 and D == 1:\n            FN += 1\n\n    cc = (count == labels)\n    # cc = list(cc)\n    N = list(cc).count(True)\n    Accu = float((N / (mm)) * 100)\n\n    # true positive among all the detected positive\n    Pre = (100 * TP) / (TP + FP + 1)\n    # true positive among all the real positive\n    Rec = (100 * TP) / (TP + FN)\n    # The F1 score is the harmonic average of the precision and recall,\n    # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.\n    F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1))\n    # False positive rate\n    FPR = (100 * FP) / (FP + TN)\n\n    return Accu, Pre, Rec, F1, FPR\n\n\ndef SPE(X, pc):\n    a = X.shape[0]\n    b = X.shape[1]\n\n    spe = np.empty([a])\n    # Square Prediction Error (square of residual distance)\n    #  spe = X'(I-PP')X\n    I = np.identity(b, float) - np.matmul(pc.transpose(1, 0), pc)\n    # I = np.matmul(I, I)\n    for i in range(a):\n        x = X[i, :].reshape([b, 1])\n        y = np.matmul(x.transpose(1, 0), I)\n        spe[i] = np.matmul(y, x)\n\n    return spe\n\n\n\ndef generator_o(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, learn_scale=True):\n    \"\"\"\n    If parameters are supplied, initialise as such\n    \"\"\"\n    # It is important to specify different variable scopes for the LSTM cells.\n    with tf.variable_scope(\"generator_o\") as scope:\n\n        W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0'])\n        b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0'])\n        try:\n            scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0'])\n        except KeyError:\n            scale_out_G_initializer = tf.constant_initializer(value=1)\n            assert learn_scale\n        lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0'])\n        bias_start = parameters['generator/rnn/lstm_cell/biases:0']\n\n        W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer)\n        b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer)\n        scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=False)\n\n        inputs = z\n\n        cell = LSTMCell(num_units=hidden_units_g,\n                        state_is_tuple=True,\n                        initializer=lstm_initializer,\n                        bias_start=bias_start,\n                        reuse=reuse)\n        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(\n            cell=cell,\n            dtype=tf.float32,\n            sequence_length=[seq_length] * batch_size,\n            inputs=inputs)\n        rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g])\n        logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum\n        output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1]\n        output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features])\n    return output_3d\n\n\ndef discriminator_o(x, hidden_units_d, reuse=False, parameters=None):\n\n    with tf.variable_scope(\"discriminator_0\") as scope:\n\n        W_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/W_out_D:0'])\n        b_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/b_out_D:0'])\n\n        W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1],  initializer=W_out_D_initializer)\n        b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=b_out_D_initializer)\n\n\n        inputs = x\n\n        cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse)\n\n        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, inputs=inputs)\n\n\n        logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum\n\n        output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability??\n        # sigmoid output ([0,1]), Probability?\n\n    return output, logits\n\n\ndef invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1,\n           n_iter=None, max_iter=10000, heuristic_sigma=None):\n    \"\"\"\n    Return the latent space points corresponding to a set of a samples\n    ( from gradient descent )\n    Note: this function is designed for ONE sample generation\n    \"\"\"\n    # num_samples = samples.shape[0]\n    # cast samples to float32\n\n    samples = np.float32(samples)\n\n    # get the model\n    # if settings is a string, assume it's an identifier and load\n    if type(settings) == str:\n        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))\n\n\n\n    # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,)\n    # if not g_tolerance is None:\n    #     print('until gradient norm is below', g_tolerance)\n    # else:\n    #     print('until error is below', e_tolerance)\n\n\n    # get parameters\n    parameters = model.load_parameters(para_path)\n    # # assertions\n    # assert samples.shape[2] == settings['num_generated_features']\n    # create VARIABLE Z\n    Z = tf.get_variable(name='Z', shape=[1, settings['seq_length'],\n                                         settings['latent_dim']],\n                        initializer=tf.random_normal_initializer())\n    # create outputs\n\n    G_samples = generator_o(Z, settings['hidden_units_g'], settings['seq_length'],\n                          1, settings['num_generated_features'],\n                          reuse=False, parameters=parameters)\n    # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale']\n    # generator_settings = dict((k, settings[k]) for k in generator_vars)\n    # G_samples = model.generator(Z, **generator_settings, reuse=True)\n\n    fd = None\n\n    # define loss mmd-based loss\n    if heuristic_sigma is None:\n        heuristic_sigma = mmd.median_pairwise_distance_o(samples)  # this is noisy\n        print('heuristic_sigma:', heuristic_sigma)\n    samples = tf.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])\n    Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1)))\n    similarity_per_sample = tf.diag_part(Kxy)\n    reconstruction_error_per_sample = 1 - similarity_per_sample\n    # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2])\n    similarity = tf.reduce_mean(similarity_per_sample)\n    reconstruction_error = 1 - similarity\n    # updater\n    #    solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z])\n    # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z])\n    solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(reconstruction_error_per_sample, var_list=[Z])\n    # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z])\n\n    grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0]\n    grad_per_Z = tf.norm(grad_Z, axis=(1, 2))\n    grad_norm = tf.reduce_mean(grad_per_Z)\n    # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z])\n    print('Finding latent state corresponding to samples...')\n\n    sess = tf.Session()\n    sess.run(tf.global_variables_initializer())\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        error = sess.run(reconstruction_error, feed_dict=fd)\n        g_n = sess.run(grad_norm, feed_dict=fd)\n        # print(g_n)\n        i = 0\n        if not n_iter is None:\n            while i < n_iter:\n                _ = sess.run(solver, feed_dict=fd)\n                error = sess.run(reconstruction_error, feed_dict=fd)\n                i += 1\n        else:\n            if not g_tolerance is None:\n                while g_n > g_tolerance:\n                    _ = sess.run(solver, feed_dict=fd)\n                    error, g_n = sess.run([reconstruction_error, grad_norm], feed_dict=fd)\n                    i += 1\n                    print(error, g_n)\n                    if i > max_iter:\n                        break\n            else:\n                while np.abs(error) > e_tolerance:\n                    _ = sess.run(solver, feed_dict=fd)\n                    error = sess.run(reconstruction_error, feed_dict=fd)\n                    i += 1\n                    # print(error)\n                    if i > max_iter:\n                        break\n        Zs = sess.run(Z, feed_dict=fd)\n        Gs = sess.run(G_samples, feed_dict={Z: Zs})\n        error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd)\n        print('Z found in', i, 'iterations with final reconstruction error of', error)\n    tf.reset_default_graph()\n\n    return Gs, Zs, error_per_sample, heuristic_sigma\n\n\ndef dis_trained_model(settings, samples, para_path):\n    \"\"\"\n    Return the discrimination results of  num_samples testing samples from a trained model described by settings dict\n    Note: this function is designed for ONE sample discrimination\n    \"\"\"\n\n    # if settings is a string, assume it's an identifier and load\n    if type(settings) == str:\n        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))\n\n    num_samples = samples.shape[0]\n    samples = np.float32(samples)\n    num_variables = samples.shape[2]\n    # samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])\n\n    # get the parameters, get other variables\n    # parameters = model.load_parameters(settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch))\n    parameters = model.load_parameters(para_path)\n    # settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch)\n\n    # create placeholder, T samples\n    # T = tf.placeholder(tf.float32, [settings['batch_size'], settings['seq_length'], settings['num_generated_features']])\n\n    T = tf.placeholder(tf.float32, [num_samples, settings['seq_length'], num_variables])\n\n    # create the discriminator (GAN)\n    # normal GAN\n    D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)\n\n    config = tf.ConfigProto()\n    config.gpu_options.allow_growth = True\n    # with tf.device('/gpu:1'):\n    with tf.Session(config=config) as sess:\n        sess.run(tf.global_variables_initializer())\n        D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})\n\n    tf.reset_default_graph()\n    return D_t, L_t\n\ndef dis_D_model(settings, samples, para_path):\n    \"\"\"\n    Return the discrimination results of  num_samples testing samples from a trained model described by settings dict\n    Note: this function is designed for ONE sample discrimination\n    \"\"\"\n\n    # if settings is a string, assume it's an identifier and load\n    if type(settings) == str:\n        settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r'))\n\n    # num_samples = samples.shape[0]\n    samples = np.float32(samples)\n    samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']])\n\n    # get the parameters, get other variables\n    parameters = model.load_parameters(para_path)\n    # create placeholder, T samples\n\n    T = tf.placeholder(tf.float32, [1, settings['seq_length'], settings['num_generated_features']])\n\n    # create the discriminator (GAN or CGAN)\n    # normal GAN\n    D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters)\n    # D_t, L_t = model.discriminator(T, settings['hidden_units_d'], settings['seq_length'], num_samples, reuse=False,\n    #               parameters=parameters, cond_dim=0, c=None, batch_mean=False)\n\n    config = tf.ConfigProto()\n    config.gpu_options.allow_growth = True\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples})\n\n    tf.reset_default_graph()\n    return D_t, L_t"
  },
  {
    "path": "README.md",
    "content": "# -- Multivariate Anomaly Detection for Time Series Data with GANs -- #\n\n# MAD-GAN\n\nThis repository contains code for the paper, _[MAD-GAN: Multivariate Anomaly Detection for Time Series Data with Generative Adversarial Networks](https://arxiv.org/pdf/1901.04997.pdf)_, by Dan Li, Dacheng Chen, Jonathan Goh, and See-Kiong Ng.\n\nMAD-GAN is a refined version of GAN-AD at _[Anomaly Detection with Generative Adversarial Networks for Multivariate Time Series](https://arxiv.org/pdf/1809.04758.pdf)_ The code can be found at https://github.com/LiDan456/GAN-AD\n\n(We are still working on this topic, will upload the completed version later...)\n\n## Overview\n\nWe used generative adversarial networks (GANs) to do anomaly detection for time series data.\nThe GAN framework was **R**GAN, whihc was taken from the paper, _[Real-valued (Medical) Time Series Generation with Recurrent Conditional GANs](https://arxiv.org/abs/1706.02633).\nPlease refer to https://github.com/ratschlab/RGAN for the original code.\n\n## Quickstart\n\n- Python3\n\n- Please unpack the data.7z file in the data folder before run RGAN.py and AD.py\n\n- To train the model:\n  \n  \"\"\" python RGAN.py --settings_file kdd99 \"\"\"\n\n- To do anomaly detection:\n\n  \"\"\" python AD.py --settings_file kdd99_test\"\"\"\n  \n  \"\"\" python AD_Invert.py --settings_file kdd99_test\"\"\"\n\n## Data\n\nWe apply our method on the SWaT and WADI datasets in the paper, however, we didn't upload the data in this repository. Please refer to https://itrust.sutd.edu.sg/ and send request to iTrust is you want to try the data.\n\nIn this repository we used kdd cup 1999 dataset as an example (please unpack the data.7z file in the data folder before run RGAN.py and AD.py). You can also down load the original data at http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html\n\n"
  },
  {
    "path": "RGAN.py",
    "content": "import numpy as np\r\nimport tensorflow as tf\r\nimport pdb\r\nimport random\r\nimport json\r\nfrom scipy.stats import mode\r\n\r\nimport data_utils\r\nimport plotting\r\nimport model\r\nimport utils\r\nimport eval\r\nimport DR_discriminator\r\n\r\nfrom time import time\r\nfrom math import floor\r\nfrom mmd import rbf_mmd2, median_pairwise_distance, mix_rbf_mmd2_and_ratio\r\n\r\nbegin = time()\r\n\r\ntf.logging.set_verbosity(tf.logging.ERROR)\r\n\r\n# --- get settings --- #\r\n# parse command line arguments, or use defaults\r\nparser = utils.rgan_options_parser()\r\nsettings = vars(parser.parse_args())\r\n# if a settings file is specified, it overrides command line arguments/defaults\r\nif settings['settings_file']: settings = utils.load_settings_from_file(settings)\r\n\r\n# --- get data, split --- #\r\n# samples, pdf, labels = data_utils.get_data(settings)\r\ndata_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'\r\nprint('Loading data from', data_path)\r\nsettings[\"eval_an\"] = False\r\nsettings[\"eval_single\"] = False\r\nsamples, labels, index = data_utils.get_data(settings[\"data\"], settings[\"seq_length\"], settings[\"seq_step\"],\r\n                                             settings[\"num_signals\"], settings['sub_id'], settings[\"eval_single\"],\r\n                                             settings[\"eval_an\"], data_path)\r\nprint('samples_size:',samples.shape)\r\n# -- number of variables -- #\r\nnum_variables = samples.shape[2]\r\nprint('num_variables:', num_variables)\r\n# --- save settings, data --- #\r\nprint('Ready to run with settings:')\r\nfor (k, v) in settings.items(): print(v, '\\t', k)\r\n# add the settings to local environment\r\n# WARNING: at this point a lot of variables appear\r\nlocals().update(settings)\r\njson.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0)\r\n\r\n# --- build model --- #\r\n# preparation: data placeholders and model parameters\r\nZ, X, T = model.create_placeholders(batch_size, seq_length, latent_dim, num_variables)\r\ndiscriminator_vars = ['hidden_units_d', 'seq_length', 'batch_size', 'batch_mean']\r\ndiscriminator_settings = dict((k, settings[k]) for k in discriminator_vars)\r\ngenerator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'learn_scale']\r\ngenerator_settings = dict((k, settings[k]) for k in generator_vars)\r\ngenerator_settings['num_signals'] = num_variables\r\n\r\n# model: GAN losses\r\nD_loss, G_loss = model.GAN_loss(Z, X, generator_settings, discriminator_settings)\r\nD_solver, G_solver, priv_accountant = model.GAN_solvers(D_loss, G_loss, learning_rate, batch_size,\r\n                                                        total_examples=samples.shape[0],\r\n                                                        l2norm_bound=l2norm_bound,\r\n                                                        batches_per_lot=batches_per_lot, sigma=dp_sigma, dp=dp)\r\n# model: generate samples for visualization\r\nG_sample = model.generator(Z, **generator_settings, reuse=True)\r\n\r\n\r\n# # --- evaluation settings--- #\r\n#\r\n# # frequency to do visualisations\r\n# num_samples = samples.shape[0]\r\n# vis_freq = max(6600 // num_samples, 1)\r\n# eval_freq = max(6600// num_samples, 1)\r\n#\r\n# # get heuristic bandwidth for mmd kernel from evaluation samples\r\n# heuristic_sigma_training = median_pairwise_distance(samples)\r\n# best_mmd2_so_far = 1000\r\n#\r\n# # optimise sigma using that (that's t-hat)\r\n# batch_multiplier = 5000 // batch_size\r\n# eval_size = batch_multiplier * batch_size\r\n# eval_eval_size = int(0.2 * eval_size)\r\n# eval_real_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])\r\n# eval_sample_PH = tf.placeholder(tf.float32, [eval_eval_size, seq_length, num_generated_features])\r\n# n_sigmas = 2\r\n# sigma = tf.get_variable(name='sigma', shape=n_sigmas, initializer=tf.constant_initializer(\r\n#     value=np.power(heuristic_sigma_training, np.linspace(-1, 3, num=n_sigmas))))\r\n# mmd2, that = mix_rbf_mmd2_and_ratio(eval_real_PH, eval_sample_PH, sigma)\r\n# with tf.variable_scope(\"SIGMA_optimizer\"):\r\n#     sigma_solver = tf.train.RMSPropOptimizer(learning_rate=0.05).minimize(-that, var_list=[sigma])\r\n#     # sigma_solver = tf.train.AdamOptimizer().minimize(-that, var_list=[sigma])\r\n#     # sigma_solver = tf.train.AdagradOptimizer(learning_rate=0.1).minimize(-that, var_list=[sigma])\r\n# sigma_opt_iter = 2000\r\n# sigma_opt_thresh = 0.001\r\n# sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name]\r\n\r\n\r\n# --- run the program --- #\r\nconfig = tf.ConfigProto()\r\nconfig.gpu_options.allow_growth = True\r\nsess = tf.Session(config=config)\r\n# sess = tf.Session()\r\nsess.run(tf.global_variables_initializer())\r\n\r\n# # -- plot the real samples -- #\r\nvis_real_indices = np.random.choice(len(samples), size=16)\r\nvis_real = np.float32(samples[vis_real_indices, :, :])\r\nplotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)\r\nplotting.save_samples_real(vis_real, identifier)\r\n\r\n# --- train --- #\r\ntrain_vars = ['batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim']\r\ntrain_settings = dict((k, settings[k]) for k in train_vars)\r\ntrain_settings['num_signals'] = num_variables\r\n\r\nt0 = time()\r\nMMD = np.zeros([num_epochs, ])\r\n\r\nfor epoch in range(num_epochs):\r\n# for epoch in range(1):\r\n    # -- train epoch -- #\r\n    D_loss_curr, G_loss_curr = model.train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss,\r\n                                                 D_solver, G_solver, **train_settings)\r\n\r\n    # # -- eval -- #\r\n    # # visualise plots of generated samples, with/without labels\r\n    # # choose which epoch to visualize\r\n    #\r\n    # # random input vectors for the latent space, as the inputs of generator\r\n    # vis_ZZ = model.sample_Z(batch_size, seq_length, latent_dim, use_time)\r\n    #\r\n    # # # -- generate samples-- #\r\n    # vis_sample = sess.run(G_sample, feed_dict={Z: vis_ZZ})\r\n    # # # -- visualize the generated samples -- #\r\n    # plotting.save_plot_sample(vis_sample, epoch, identifier, n_samples=16, num_epochs=None, ncol=4)\r\n    # # plotting.save_plot_sample(vis_sample, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs)\r\n    # # # save the generated samples in cased they might be useful for comparison\r\n    # plotting.save_samples(vis_sample, identifier, epoch)\r\n\r\n    # -- print -- #\r\n    print('epoch, D_loss_curr, G_loss_curr, seq_length')\r\n    print('%d\\t%.4f\\t%.4f\\t%d' % (epoch, D_loss_curr, G_loss_curr, seq_length))\r\n\r\n    # # -- compute mmd2 and if available, prob density -- #\r\n    # if epoch % eval_freq == 0:\r\n    #     # how many samples to evaluate with?\r\n    #     eval_Z = model.sample_Z(eval_size, seq_length, latent_dim, use_time)\r\n    #     eval_sample = np.empty(shape=(eval_size, seq_length, num_signals))\r\n    #     for i in range(batch_multiplier):\r\n    #         eval_sample[i * batch_size:(i + 1) * batch_size, :, :] = sess.run(G_sample, feed_dict={ Z: eval_Z[i * batch_size:(i + 1) * batch_size]})\r\n    #     eval_sample = np.float32(eval_sample)\r\n    #     eval_real = np.float32(samples['vali'][np.random.choice(len(samples['vali']), size=batch_multiplier * batch_size), :, :])\r\n    #\r\n    #     eval_eval_real = eval_real[:eval_eval_size]\r\n    #     eval_test_real = eval_real[eval_eval_size:]\r\n    #     eval_eval_sample = eval_sample[:eval_eval_size]\r\n    #     eval_test_sample = eval_sample[eval_eval_size:]\r\n    #\r\n    #     # MMD\r\n    #     # reset ADAM variables\r\n    #     sess.run(tf.initialize_variables(sigma_opt_vars))\r\n    #     sigma_iter = 0\r\n    #     that_change = sigma_opt_thresh * 2\r\n    #     old_that = 0\r\n    #     while that_change > sigma_opt_thresh and sigma_iter < sigma_opt_iter:\r\n    #         new_sigma, that_np, _ = sess.run([sigma, that, sigma_solver],\r\n    #                                          feed_dict={eval_real_PH: eval_eval_real, eval_sample_PH: eval_eval_sample})\r\n    #         that_change = np.abs(that_np - old_that)\r\n    #         old_that = that_np\r\n    #         sigma_iter += 1\r\n    #     opt_sigma = sess.run(sigma)\r\n    #     try:\r\n    #         mmd2, that_np = sess.run(mix_rbf_mmd2_and_ratio(eval_test_real, eval_test_sample, biased=False, sigmas=sigma))\r\n    #     except ValueError:\r\n    #         mmd2 = 'NA'\r\n    #         that = 'NA'\r\n    #\r\n    #     MMD[epoch, ] = mmd2\r\n\r\n    # -- save model parameters -- #\r\n    model.dump_parameters(sub_id + '_' + str(seq_length) + '_' + str(epoch), sess)\r\n\r\nnp.save('./experiments/plots/gs/' + identifier + '_' + 'MMD.npy', MMD)\r\n\r\nend = time() - begin\r\nprint('Training terminated | Training time=%d s' %(end) )\r\n\r\nprint(\"Training terminated | training time = %ds  \" % (time() - begin))"
  },
  {
    "path": "data_utils.py",
    "content": "import numpy as np\nimport pandas as pd\nimport pdb\nimport re\nfrom time import time\nimport json\nimport random\n\nimport model\n\nfrom scipy.spatial.distance import pdist, squareform\nfrom scipy.stats import multivariate_normal, invgamma, mode\nfrom scipy.special import gamma\nfrom scipy.misc.pilutil import imresize\nfrom functools import partial\nfrom math import ceil\n\nfrom sklearn.metrics.pairwise import rbf_kernel\nfrom sklearn.preprocessing import MinMaxScaler\nfrom sklearn import preprocessing\n\n\n# --- deal with the SWaT data --- #\ndef swat(seq_length, seq_step, num_signals, randomize=False):\n    \"\"\" Load and serialise \"\"\"\n    # train = np.load('./data/swat.npy')\n    # print('Loaded swat from .npy')\n    train = np.loadtxt(open('./data/swat.csv'), delimiter=',')\n    print('Loaded swat from .csv')\n    m, n = train.shape # m=496800, n=52\n    for i in range(n - 1):\n        A = max(train[:, i])\n        if A != 0:\n            train[:, i] /= max(train[:, i])\n            # scale from -1 to 1\n            train[:, i] = 2 * train[:, i] - 1\n        else:\n            train[:, i] = train[:, i]\n\n    samples = train[21600:, 0:n-1]\n    labels = train[21600:, n-1]    # the last colummn is label\n    #############################\n    # -- choose variable for uni-variate GAN-AD -- #\n    # samples = samples[:, [1, 8, 18, 28]]\n    ############################\n    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    from sklearn.decomposition import PCA\n    # ALL SENSORS IDX\n    # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]\n    # X_n = samples[:, XS]\n    # X_a = samples_a[:, XS]\n    # All VARIABLES\n    X_n = samples\n    ####################################\n    ###################################\n    # -- the best PC dimension is chosen pc=5 -- #\n    n_components = num_signals\n    pca = PCA(n_components, svd_solver='full')\n    pca.fit(X_n)\n    ex_var = pca.explained_variance_ratio_\n    pc = pca.components_\n\n    # projected values on the principal component\n    T_n = np.matmul(X_n, pc.transpose(1, 0))\n    samples = T_n\n\n    # # only for one-dimensional\n    # samples = T_n.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    # seq_length = 7200\n    num_samples = (samples.shape[0]-seq_length)//seq_step\n    print(\"num_samples:\", num_samples)\n    print(\"num_signals:\", num_signals)\n    aa = np.empty([num_samples, seq_length, num_signals])\n    bb = np.empty([num_samples, seq_length, 1])\n\n    for j in range(num_samples):\n       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])\n       for i in range(num_signals):\n           aa[j, :, i] = samples[(j * seq_step):(j*seq_step + seq_length), i]\n\n    # samples = aa[:, 0:7200:200, :]\n    # labels = bb[:, 0:7200:200, :]\n    samples = aa\n    labels = bb\n\n    return samples, labels\n\ndef swat_birgan(seq_length, seq_step, num_signals, randomize=False):\n    \"\"\" Load and serialise \"\"\"\n    # train = np.load('./data/swat.npy')\n    # print('Loaded swat from .npy')\n    train = np.loadtxt(open('./data/swat.csv'), delimiter=',')\n    print('Loaded swat from .csv')\n    m, n = train.shape # m=496800, n=52\n    for i in range(n - 1):\n        A = max(train[:, i])\n        if A != 0:\n            train[:, i] /= max(train[:, i])\n            # scale from -1 to 1\n            train[:, i] = 2 * train[:, i] - 1\n        else:\n            train[:, i] = train[:, i]\n\n    samples = train[21600:, 0:n-1]\n    labels = train[21600:, n-1]    # the last colummn is label\n    #############################\n    # # -- choose variable for uni-variate GAN-AD -- #\n    # # samples = samples[:, [1, 8, 18, 28]]\n    ###########################################\n    ###########################################\n    nn = samples.shape[1]\n    num_samples = (samples.shape[0]-seq_length)//seq_step\n    aa = np.empty([num_samples, nn, nn])\n    AA = np.empty([seq_length, nn])\n    bb = np.empty([num_samples, seq_length, 1])\n\n    print('Pre-process training data...')\n    for j in range(num_samples):\n       # display batch progress\n       model_bigan.display_batch_progression(j, num_samples)\n       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])\n       for i in range(nn):\n           AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n       aa[j, :, :] = np.cov(AA.T)\n\n    samples = aa\n    labels = bb\n\n    return samples, labels\n\ndef swat_test(seq_length, seq_step, num_signals, randomize=False):\n    \"\"\" Load and serialise \"\"\"\n    # test = np.load('./data/swat_a.npy')\n    # print('Loaded swat_a from .npy')\n    test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')\n    print('Loaded swat_a from .csv')\n    m, n = test.shape  # m1=449919, n1=52\n    for i in range(n - 1):\n        B = max(test[:, i])\n        if B != 0:\n            test[:, i] /= max(test[:, i])\n            # scale from -1 to 1\n            test[:, i] = 2 * test[:, i] - 1\n        else:\n            test[:, i] = test[:, i]\n\n    samples = test[:, 0:n - 1]\n    labels = test[:, n - 1]\n    idx = np.asarray(list(range(0, m)))  # record the idx of each point\n    #############################\n    # -- choose variable for uni-variate GAN-AD -- #\n    # samples = samples[:, [1,2,3,4]]\n    # samples_a = samples_a[:, [1,2,3,4]]\n    ############################\n    ############################\n    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    from sklearn.decomposition import PCA\n    import DR_discriminator as dr\n    # ALL SENSORS IDX\n    # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47]\n    # X_n = samples[:, XS]\n    # X_a = samples_a[:, XS]\n    # All VARIABLES\n    X_a = samples\n    ####################################\n    ###################################\n    # -- the best PC dimension is chosen pc=5 -- #\n    n_components = num_signals\n    pca_a = PCA(n_components, svd_solver='full')\n    pca_a.fit(X_a)\n    pc_a = pca_a.components_\n    # projected values on the principal component\n    T_a = np.matmul(X_a, pc_a.transpose(1, 0))\n\n    samples = T_a\n    # # only for one-dimensional\n    # samples = T_a.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    num_samples_t = (samples.shape[0] - seq_length) // seq_step\n    aa = np.empty([num_samples_t, seq_length, num_signals])\n    bb = np.empty([num_samples_t, seq_length, 1])\n    bbb = np.empty([num_samples_t, seq_length, 1])\n\n    for j in range(num_samples_t):\n        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        for i in range(num_signals):\n            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n\n    samples = aa\n    labels = bb\n    index = bbb\n\n    return samples, labels, index\n\n\ndef swat_birgan_test(seq_length, seq_step, num_signals, randomize=False):\n    \"\"\" Load and serialise \"\"\"\n    # train = np.load('./data/swat.npy')\n    # print('Loaded swat from .npy')\n    test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',')\n    print('Loaded swat_a from .csv')\n    m, n = test.shape  # m1=449919, n1=52\n    for i in range(n - 1):\n        B = max(test[:, i])\n        if B != 0:\n            test[:, i] /= max(test[:, i])\n            # scale from -1 to 1\n            test[:, i] = 2 * test[:, i] - 1\n        else:\n            test[:, i] = test[:, i]\n\n    samples = test[:, 0:n - 1]\n    labels = test[:, n - 1]\n    # idx = np.asarray(list(range(0, m)))  # record the idx of each point\n    #############################\n    # # -- choose variable for uni-variate GAN-AD -- #\n    # # samples = samples[:, [1, 8, 18, 28]]\n    ###########################################\n    ###########################################\n    nn = samples.shape[1]\n    num_samples = (samples.shape[0]-seq_length)//seq_step\n    aa = np.empty([num_samples, nn, nn])\n    AA = np.empty([seq_length, nn])\n    bb = np.empty([num_samples, seq_length, 1])\n\n    print('Pre-process testing data...')\n    for j in range(num_samples):\n       # display batch progress\n       model_bigan.display_batch_progression(j, num_samples)\n       bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1])\n       for i in range(nn):\n           AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n       aa[j, :, :] = np.cov(AA.T)\n\n    samples = aa\n    labels = bb\n\n    return samples, labels\n\n\ndef wadi(seq_length, seq_step, num_signals, randomize=False):\n    train = np.load('./data/wadi.npy')\n    print('Loaded wadi from .npy')\n    m, n = train.shape  # m=1048571, n=119\n    for i in range(n-1):\n        A = max(train[:, i])\n        if A != 0:\n            train[:, i] /= max(train[:, i])\n            # scale from -1 to 1\n            train[:, i] = 2 * train[:, i] - 1\n        else:\n            train[:, i] = train[:, i]\n\n    samples = train[259200:, 0:n-1]  # normal\n    labels = train[259200:, n-1]\n    #############################\n    samples = samples[:, [0, 3, 6, 17]]\n    # samples = samples[:, 0]\n    ############################\n    # # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    # from sklearn.decomposition import PCA\n    # import DR_discriminator as dr\n    # X_n = samples\n    # ####################################\n    # ###################################\n    # # -- the best PC dimension is chosen pc=8 -- #\n    # n_components = num_signals\n    # pca = PCA(n_components, svd_solver='full')\n    # pca.fit(X_n)\n    # pc = pca.components_\n    # # projected values on the principal component\n    # T_n = np.matmul(X_n, pc.transpose(1, 0))\n    #\n    # samples = T_n\n    # # # only for one-dimensional\n    # # samples = T_n.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    seq_length = 10800\n    num_samples = (samples.shape[0] - seq_length) // seq_step\n    print(\"num_samples:\", num_samples)\n    print(\"num_signals:\", num_signals)\n    aa = np.empty([num_samples, seq_length, num_signals])\n    bb = np.empty([num_samples, seq_length, 1])\n\n    for j in range(num_samples):\n        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        # aa[j, :, :] = np.reshape(samples[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        for i in range(num_signals):\n            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n\n    samples = aa[:, 0:10800:300, :]\n    labels = bb[:, 0:10800:300, :]\n\n    return samples, labels\n\n\ndef wadi_test(seq_length, seq_step, num_signals, randomize=False):\n    test = np.load('./data/wadi_a.npy')\n    print('Loaded wadi_a from .npy')\n    m, n = test.shape  # m1=172801, n1=119\n\n    for i in range(n - 1):\n        B = max(test[:, i])\n        if B != 0:\n            test[:, i] /= max(test[:, i])\n            # scale from -1 to 1\n            test[:, i] = 2 * test[:, i] - 1\n        else:\n            test[:, i] = test[:, i]\n\n    samples = test[:, 0:n - 1]\n    labels = test[:, n - 1]\n    idx = np.asarray(list(range(0, m)))  # record the idx of each point\n    #############################\n    ############################\n    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    from sklearn.decomposition import PCA\n    import DR_discriminator as dr\n    X_a = samples\n    ####################################\n    ###################################\n    # -- the best PC dimension is chosen pc=8 -- #\n    n_components = num_signals\n    pca_a = PCA(n_components, svd_solver='full')\n    pca_a.fit(X_a)\n    pc_a = pca_a.components_\n    # projected values on the principal component\n    T_a = np.matmul(X_a, pc_a.transpose(1, 0))\n\n    samples = T_a\n    # # only for one-dimensional\n    # samples = T_a.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    num_samples_t = (samples.shape[0] - seq_length) // seq_step\n    aa = np.empty([num_samples_t, seq_length, num_signals])\n    bb = np.empty([num_samples_t, seq_length, 1])\n    bbb = np.empty([num_samples_t, seq_length, 1])\n\n    for j in range(num_samples_t):\n        bb[j, :, :] = np.reshape(labels[(j * 10):(j * seq_step + seq_length)], [-1, 1])\n        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        for i in range(num_signals):\n            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n\n    samples = aa\n    labels = bb\n    index = bbb\n\n    return samples, labels, index\n\ndef kdd99(seq_length, seq_step, num_signals):\n    train = np.load('./data/kdd99_train.npy')\n    print('load kdd99_train from .npy')\n    m, n = train.shape  # m=562387, n=35\n    # normalization\n    for i in range(n - 1):\n        # print('i=', i)\n        A = max(train[:, i])\n        # print('A=', A)\n        if A != 0:\n            train[:, i] /= max(train[:, i])\n            # scale from -1 to 1\n            train[:, i] = 2 * train[:, i] - 1\n        else:\n            train[:, i] = train[:, i]\n\n    samples = train[:, 0:n - 1]\n    labels = train[:, n - 1]  # the last colummn is label\n    #############################\n    ############################\n    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    from sklearn.decomposition import PCA\n    X_n = samples\n    ####################################\n    ###################################\n    # -- the best PC dimension is chosen pc=6 -- #\n    n_components = num_signals\n    pca = PCA(n_components, svd_solver='full')\n    pca.fit(X_n)\n    ex_var = pca.explained_variance_ratio_\n    pc = pca.components_\n    # projected values on the principal component\n    T_n = np.matmul(X_n, pc.transpose(1, 0))\n    samples = T_n\n    # # only for one-dimensional\n    # samples = T_n.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    num_samples = (samples.shape[0] - seq_length) // seq_step\n    aa = np.empty([num_samples, seq_length, num_signals])\n    bb = np.empty([num_samples, seq_length, 1])\n\n    for j in range(num_samples):\n        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        for i in range(num_signals):\n            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n\n    samples = aa\n    labels = bb\n\n    return samples, labels\n\ndef kdd99_test(seq_length, seq_step, num_signals):\n    test = np.load('./data/kdd99_test.npy')\n    print('load kdd99_test from .npy')\n\n    m, n = test.shape  # m1=494021, n1=35\n\n    for i in range(n - 1):\n        B = max(test[:, i])\n        if B != 0:\n            test[:, i] /= max(test[:, i])\n            # scale from -1 to 1\n            test[:, i] = 2 * test[:, i] - 1\n        else:\n            test[:, i] = test[:, i]\n\n    samples = test[:, 0:n - 1]\n    labels = test[:, n - 1]\n    idx = np.asarray(list(range(0, m)))  # record the idx of each point\n    #############################\n    ############################\n    # -- apply PCA dimension reduction for multi-variate GAN-AD -- #\n    from sklearn.decomposition import PCA\n    import DR_discriminator as dr\n    X_a = samples\n    ####################################\n    ###################################\n    # -- the best PC dimension is chosen pc=6 -- #\n    n_components = num_signals\n    pca_a = PCA(n_components, svd_solver='full')\n    pca_a.fit(X_a)\n    pc_a = pca_a.components_\n    # projected values on the principal component\n    T_a = np.matmul(X_a, pc_a.transpose(1, 0))\n    samples = T_a\n    # # only for one-dimensional\n    # samples = T_a.reshape([samples.shape[0], ])\n    ###########################################\n    ###########################################\n    num_samples_t = (samples.shape[0] - seq_length) // seq_step\n    aa = np.empty([num_samples_t, seq_length, num_signals])\n    bb = np.empty([num_samples_t, seq_length, 1])\n    bbb = np.empty([num_samples_t, seq_length, 1])\n\n    for j in range(num_samples_t):\n        bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1])\n        for i in range(num_signals):\n            aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i]\n\n    samples = aa\n    labels = bb\n    index = bbb\n\n    return samples, labels, index\n\n\n# ############################ data pre-processing #################################\n# --- to do with loading --- #\n# --- to do with loading --- #\ndef get_samples_and_labels(settings):\n    \"\"\"\n    Parse settings options to load or generate correct type of data,\n    perform test/train split as necessary, and reform into 'samples' and 'labels'\n    dictionaries.\n    \"\"\"\n    if settings['data_load_from']:\n        data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy'\n        print('Loading data from', data_path)\n        samples, pdf, labels = get_data('load', data_path)\n        train, vali, test = samples['train'], samples['vali'], samples['test']\n        train_labels, vali_labels, test_labels = labels['train'], labels['vali'], labels['test']\n        del samples, labels\n    else:\n        # generate the data\n        data_vars = ['num_samples', 'num_samples_t','seq_length', 'seq_step', 'num_signals', 'freq_low',\n                'freq_high', 'amplitude_low', 'amplitude_high', 'scale', 'full_mnist']\n        data_settings = dict((k, settings[k]) for k in data_vars if k in settings.keys())\n        samples, pdf, labels = get_data(settings['data'], settings['seq_length'], settings['seq_step'], settings['num_signals'], settings['sub_id'])\n        if 'multivariate_mnist' in settings and settings['multivariate_mnist']:\n            seq_length = samples.shape[1]\n            samples = samples.reshape(-1, int(np.sqrt(seq_length)), int(np.sqrt(seq_length)))\n        if 'normalise' in settings and settings['normalise']: # TODO this is a mess, fix\n            print(settings['normalise'])\n            norm = True\n        else:\n            norm = False\n        if labels is None:\n            train, vali, test = split(samples, [0.6, 0.2, 0.2], normalise=norm)\n            train_labels, vali_labels, test_labels = None, None, None\n        else:\n            train, vali, test, labels_list = split(samples, [0.6, 0.2, 0.2], normalise=norm, labels=labels)\n            train_labels, vali_labels, test_labels = labels_list\n\n    labels = dict()\n    labels['train'], labels['vali'], labels['test'] = train_labels, vali_labels, test_labels\n\n    samples = dict()\n    samples['train'], samples['vali'], samples['test'] = train, vali, test\n\n    # futz around with labels\n    # TODO refactor cause this is messy\n    if 'one_hot' in settings and settings['one_hot'] and not settings['data_load_from']:\n        if len(labels['train'].shape) == 1:\n            # ASSUME labels go from 0 to max_val inclusive, find max-val\n            max_val = int(np.max([labels['train'].max(), labels['test'].max(), labels['vali'].max()]))\n            # now we have max_val + 1 dimensions\n            print('Setting cond_dim to', max_val + 1, 'from', settings['cond_dim'])\n            settings['cond_dim'] = max_val + 1\n            print('Setting max_val to 1 from', settings['max_val'])\n            settings['max_val'] = 1\n\n            labels_oh = dict()\n            for (k, v) in labels.items():\n                A = np.zeros(shape=(len(v), settings['cond_dim']))\n                A[np.arange(len(v)), (v).astype(int)] = 1\n                labels_oh[k] = A\n            labels = labels_oh\n        else:\n            assert settings['max_val'] == 1\n            # this is already one-hot!\n\n    if 'predict_labels' in settings and settings['predict_labels']:\n        samples, labels = data_utils.make_predict_labels(samples, labels)\n        print('Setting cond_dim to 0 from', settings['cond_dim'])\n        settings['cond_dim'] = 0\n\n    # update the settings dictionary to update erroneous settings\n    # (mostly about the sequence length etc. - it gets set by the data!)\n    settings['seq_length'] = samples['train'].shape[1]\n    settings['num_samples'] = samples['train'].shape[0] + samples['vali'].shape[0] + samples['test'].shape[0]\n    settings['num_signals'] = samples['train'].shape[2]\n\n    return samples, pdf, labels\n\n\ndef get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_single, eval_an, data_options=None):\n    \"\"\"\n    Helper/wrapper function to get the requested data.\n    \"\"\"\n    print('data_type')\n    labels = None\n    index = None\n    if data_type == 'load':\n        data_dict = np.load(data_options).item()\n        samples = data_dict['samples']\n        pdf = data_dict['pdf']\n        labels = data_dict['labels']\n    elif data_type == 'swat':\n        samples, labels = swat(seq_length, seq_step, num_signals)\n    elif data_type == 'swat_test':\n        samples, labels, index = swat_test(seq_length, seq_step, num_signals)\n    elif data_type == 'kdd99':\n        samples, labels = kdd99(seq_length, seq_step, num_signals)\n    elif data_type == 'kdd99_test':\n        samples, labels, index = kdd99_test(seq_length, seq_step, num_signals)\n    elif data_type == 'wadi':\n        samples, labels = wadi(seq_length, seq_step, num_signals)\n    elif data_type == 'wadi_test':\n        samples, labels, index = wadi_test(seq_length, seq_step, num_signals)\n    else:\n        raise ValueError(data_type)\n    print('Generated/loaded', len(samples), 'samples from data-type', data_type)\n    return samples, labels, index\n\n\ndef get_batch(samples, batch_size, batch_idx, labels=None):\n    start_pos = batch_idx * batch_size\n    end_pos = start_pos + batch_size\n    if labels is None:\n        return samples[start_pos:end_pos], None\n    else:\n        if type(labels) == tuple: # two sets of labels\n            assert len(labels) == 2\n            return samples[start_pos:end_pos], labels[0][start_pos:end_pos], labels[1][start_pos:end_pos]\n        else:\n            assert type(labels) == np.ndarray\n            return samples[start_pos:end_pos], labels[start_pos:end_pos]\n\n\n\ndef split(samples, proportions, normalise=False, scale=False, labels=None, random_seed=None):\n    \"\"\"\n    Return train/validation/test split.\n    \"\"\"\n    if random_seed != None:\n        random.seed(random_seed)\n        np.random.seed(random_seed)\n    assert np.sum(proportions) == 1\n    n_total = samples.shape[0]\n    n_train = ceil(n_total * proportions[0])\n    n_test = ceil(n_total * proportions[2])\n    n_vali = n_total - (n_train + n_test)\n    # permutation to shuffle the samples\n    shuff = np.random.permutation(n_total)\n    train_indices = shuff[:n_train]\n    vali_indices = shuff[n_train:(n_train + n_vali)]\n    test_indices = shuff[(n_train + n_vali):]\n    # TODO when we want to scale we can just return the indices\n    assert len(set(train_indices).intersection(vali_indices)) == 0\n    assert len(set(train_indices).intersection(test_indices)) == 0\n    assert len(set(vali_indices).intersection(test_indices)) == 0\n    # split up the samples\n    train = samples[train_indices]\n    vali = samples[vali_indices]\n    test = samples[test_indices]\n    # apply the same normalisation scheme to all parts of the split\n    if normalise:\n        if scale: raise ValueError(normalise, scale)  # mutually exclusive\n        train, vali, test = normalise_data(train, vali, test)\n    elif scale:\n        train, vali, test = scale_data(train, vali, test)\n    if labels is None:\n        return train, vali, test\n    else:\n        print('Splitting labels...')\n        if type(labels) == np.ndarray:\n            train_labels = labels[train_indices]\n            vali_labels = labels[vali_indices]\n            test_labels = labels[test_indices]\n            labels_split = [train_labels, vali_labels, test_labels]\n        elif type(labels) == dict:\n            # more than one set of labels!  (weird case)\n            labels_split = dict()\n            for (label_name, label_set) in labels.items():\n                train_labels = label_set[train_indices]\n                vali_labels = label_set[vali_indices]\n                test_labels = label_set[test_indices]\n                labels_split[label_name] = [train_labels, vali_labels, test_labels]\n        else:\n            raise ValueError(type(labels))\n        return train, vali, test, labels_split\n"
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py",
    "content": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some small edits by @corcra)\n\n# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Differentially private optimizers.\n\"\"\"\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom differential_privacy.dp_sgd.dp_optimizer import utils\n#from differential_privacy.dp_sgd.per_example_gradients import per_example_gradients\n\nimport pdb\n\nclass DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):\n  \"\"\"Differentially private gradient descent optimizer.\n  \"\"\"\n\n  def __init__(self, learning_rate, eps_delta, sanitizer,\n               sigma=None, use_locking=False, name=\"DPGradientDescent\",\n               batches_per_lot=1):\n    \"\"\"Construct a differentially private gradient descent optimizer.\n\n    The optimizer uses fixed privacy budget for each batch of training.\n\n    Args:\n      learning_rate: for GradientDescentOptimizer.\n      eps_delta: EpsDelta pair for each epoch.\n      sanitizer: for sanitizing the graident.\n      sigma: noise sigma. If None, use eps_delta pair to compute sigma;\n        otherwise use supplied sigma directly.\n      use_locking: use locking.\n      name: name for the object.\n      batches_per_lot: Number of batches in a lot.\n    \"\"\"\n\n    super(DPGradientDescentOptimizer, self).__init__(learning_rate,\n                                                     use_locking, name)\n    # Also, if needed, define the gradient accumulators\n    self._batches_per_lot = batches_per_lot\n    self._grad_accum_dict = {}\n    if batches_per_lot > 1:\n      self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False,\n                                      name=\"batch_count\")\n      var_list = tf.trainable_variables()\n      with tf.variable_scope(\"grad_acc_for\"):\n        for var in var_list:\n          v_grad_accum = tf.Variable(tf.zeros_like(var),\n                                     trainable=False,\n                                     name=utils.GetTensorOpName(var))\n          self._grad_accum_dict[var.name] = v_grad_accum\n\n    self._eps_delta = eps_delta\n    self._sanitizer = sanitizer\n    self._sigma = sigma\n\n  def compute_sanitized_gradients(self, loss, var_list=None,\n                                  add_noise=True):\n    \"\"\"Compute the sanitized gradients.\n\n    Args:\n      loss: the loss tensor.\n      var_list: the optional variables.\n      add_noise: if true, then add noise. Always clip.\n    Returns:\n      a pair of (list of sanitized gradients) and privacy spending accumulation\n      operations.\n    Raises:\n      TypeError: if var_list contains non-variable.\n    \"\"\"\n\n    self._assert_valid_dtypes([loss])\n\n    xs = [tf.convert_to_tensor(x) for x in var_list]\n    # TODO check this change\n    loss_list = tf.unstack(loss, axis=0)\n    px_grads_byexample = [tf.gradients(l, xs) for l in loss_list]\n    px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))]\n    #px_grads = tf.gradients(loss, xs)\n    # add a dummy 0th dimension to reflect the fact that we have a batch size of 1...\n  #  px_grads = [tf.expand_dims(x, 0) for x in px_grads]\n#    px_grads = per_example_gradients.PerExampleGradients(loss, xs)\n    sanitized_grads = []\n    for px_grad, v in zip(px_grads, var_list):\n      tensor_name = utils.GetTensorOpName(v)\n      sanitized_grad = self._sanitizer.sanitize(\n          px_grad, self._eps_delta, sigma=self._sigma,\n          tensor_name=tensor_name, add_noise=add_noise,\n          num_examples=self._batches_per_lot * tf.slice(\n              tf.shape(px_grad), [0], [1]))\n      sanitized_grads.append(sanitized_grad)\n\n    return sanitized_grads\n\n  def minimize(self, loss, global_step=None, var_list=None,\n               name=None):\n    \"\"\"Minimize using sanitized gradients.\n\n    This gets a var_list which is the list of trainable variables.\n    For each var in var_list, we defined a grad_accumulator variable\n    during init. When batches_per_lot > 1, we accumulate the gradient\n    update in those. At the end of each lot, we apply the update back to\n    the variable. This has the effect that for each lot we compute\n    gradients at the point at the beginning of the lot, and then apply one\n    update at the end of the lot. In other words, semantically, we are doing\n    SGD with one lot being the equivalent of one usual batch of size\n    batch_size * batches_per_lot.\n    This allows us to simulate larger batches than our memory size would permit.\n\n    The lr and the num_steps are in the lot world.\n\n    Args:\n      loss: the loss tensor.\n      global_step: the optional global step.\n      var_list: the optional variables.\n      name: the optional name.\n    Returns:\n      the operation that runs one step of DP gradient descent.\n    \"\"\"\n\n    # First validate the var_list\n\n    if var_list is None:\n      var_list = tf.trainable_variables()\n    for var in var_list:\n      if not isinstance(var, tf.Variable):\n        raise TypeError(\"Argument is not a variable.Variable: %s\" % var)\n\n    # Modification: apply gradient once every batches_per_lot many steps.\n    # This may lead to smaller error\n\n    if self._batches_per_lot == 1:\n      sanitized_grads = self.compute_sanitized_gradients(\n          loss, var_list=var_list)\n\n      grads_and_vars = list(zip(sanitized_grads, var_list))\n      self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])\n\n      apply_grads = self.apply_gradients(grads_and_vars,\n                                         global_step=global_step, name=name)\n      return apply_grads\n\n    # Condition for deciding whether to accumulate the gradient\n    # or actually apply it.\n    # we use a private self_batch_count to keep track of number of batches.\n    # global step will count number of lots processed.\n\n    update_cond = tf.equal(tf.constant(0),\n                           tf.mod(self._batch_count,\n                                  tf.constant(self._batches_per_lot)))\n\n    # Things to do for batches other than last of the lot.\n    # Add non-noisy clipped grads to shadow variables.\n\n    def non_last_in_lot_op(loss, var_list):\n      \"\"\"Ops to do for a typical batch.\n\n      For a batch that is not the last one in the lot, we simply compute the\n      sanitized gradients and apply them to the grad_acc variables.\n\n      Args:\n        loss: loss function tensor\n        var_list: list of variables\n      Returns:\n        A tensorflow op to do the updates to the gradient accumulators\n      \"\"\"\n      sanitized_grads = self.compute_sanitized_gradients(\n          loss, var_list=var_list, add_noise=False)\n\n      update_ops_list = []\n      for var, grad in zip(var_list, sanitized_grads):\n        grad_acc_v = self._grad_accum_dict[var.name]\n        update_ops_list.append(grad_acc_v.assign_add(grad))\n      update_ops_list.append(self._batch_count.assign_add(1))\n      return tf.group(*update_ops_list)\n\n    # Things to do for last batch of a lot.\n    # Add noisy clipped grads to accumulator.\n    # Apply accumulated grads to vars.\n\n    def last_in_lot_op(loss, var_list, global_step):\n      \"\"\"Ops to do for last batch in a lot.\n\n      For the last batch in the lot, we first add the sanitized gradients to\n      the gradient acc variables, and then apply these\n      values over to the original variables (via an apply gradient)\n\n      Args:\n        loss: loss function tensor\n        var_list: list of variables\n        global_step: optional global step to be passed to apply_gradients\n      Returns:\n        A tensorflow op to push updates from shadow vars to real vars.\n      \"\"\"\n\n      # We add noise in the last lot. This is why we need this code snippet\n      # that looks almost identical to the non_last_op case here.\n      sanitized_grads = self.compute_sanitized_gradients(\n          loss, var_list=var_list, add_noise=True)\n\n      normalized_grads = []\n      for var, grad in zip(var_list, sanitized_grads):\n        grad_acc_v = self._grad_accum_dict[var.name]\n        # To handle the lr difference per lot vs per batch, we divide the\n        # update by number of batches per lot.\n        normalized_grad = tf.div(grad_acc_v.assign_add(grad),\n                                 tf.to_float(self._batches_per_lot))\n\n        normalized_grads.append(normalized_grad)\n\n      with tf.control_dependencies(normalized_grads):\n        grads_and_vars = list(zip(normalized_grads, var_list))\n        self._assert_valid_dtypes(\n            [v for g, v in grads_and_vars if g is not None])\n        apply_san_grads = self.apply_gradients(grads_and_vars,\n                                               global_step=global_step,\n                                               name=\"apply_grads\")\n\n      # Now reset the accumulators to zero\n      resets_list = []\n      with tf.control_dependencies([apply_san_grads]):\n        for _, acc in self._grad_accum_dict.items():\n          reset = tf.assign(acc, tf.zeros_like(acc))\n          resets_list.append(reset)\n      resets_list.append(self._batch_count.assign_add(1))\n\n      last_step_update = tf.group(*([apply_san_grads] + resets_list))\n      return last_step_update\n    # pylint: disable=g-long-lambda\n    update_op = tf.cond(update_cond,\n                        lambda: last_in_lot_op(\n                            loss, var_list,\n                            global_step),\n                        lambda: non_last_in_lot_op(\n                            loss, var_list))\n    return tf.group(update_op)\n"
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/sanitizer.py",
    "content": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some small edits by @corcra)\n# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Defines Sanitizer class for sanitizing tensors.\n\nA sanitizer first limits the sensitivity of a tensor and then adds noise\nto the tensor. The parameters are determined by the privacy_spending and the\nother parameters. It also uses an accountant to keep track of the privacy\nspending.\n\"\"\"\nfrom __future__ import division\n\nimport collections\n\nimport tensorflow as tf\n\nfrom differential_privacy.dp_sgd.dp_optimizer import utils\n\nimport pdb\n\nClipOption = collections.namedtuple(\"ClipOption\",\n                                    [\"l2norm_bound\", \"clip\"])\n\n\nclass AmortizedGaussianSanitizer(object):\n  \"\"\"Sanitizer with Gaussian noise and amoritzed privacy spending accounting.\n\n  This sanitizes a tensor by first clipping the tensor, summing the tensor\n  and then adding appropriate amount of noise. It also uses an amortized\n  accountant to keep track of privacy spending.\n  \"\"\"\n\n  def __init__(self, accountant, default_option):\n    \"\"\"Construct an AmortizedGaussianSanitizer.\n\n    Args:\n      accountant: the privacy accountant. Expect an amortized one.\n      default_option: the default ClipOptoin.\n    \"\"\"\n\n    self._accountant = accountant\n    self._default_option = default_option\n    self._options = {}\n\n  def set_option(self, tensor_name, option):\n    \"\"\"Set options for an individual tensor.\n\n    Args:\n      tensor_name: the name of the tensor.\n      option: clip option.\n    \"\"\"\n\n    self._options[tensor_name] = option\n\n  def sanitize(self, x, eps_delta, sigma=None,\n               option=ClipOption(None, None), tensor_name=None,\n               num_examples=None, add_noise=True):\n    \"\"\"Sanitize the given tensor.\n\n    This santize a given tensor by first applying l2 norm clipping and then\n    adding Gaussian noise. It calls the privacy accountant for updating the\n    privacy spending.\n\n    Args:\n      x: the tensor to sanitize.\n      eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to\n        compute sigma if sigma is None.\n      sigma: if sigma is not None, use sigma.\n      option: a ClipOption which, if supplied, used for\n        clipping and adding noise.\n      tensor_name: the name of the tensor.\n      num_examples: if None, use the number of \"rows\" of x.\n      add_noise: if True, then add noise, else just clip.\n    Returns:\n      a pair of sanitized tensor and the operation to accumulate privacy\n      spending.\n    \"\"\"\n    if sigma is None:\n      # pylint: disable=unpacking-non-sequence\n      eps, delta = eps_delta\n      with tf.control_dependencies(\n          [tf.Assert(tf.greater(eps, 0),\n                     [\"eps needs to be greater than 0\"]),\n           tf.Assert(tf.greater(delta, 0),\n                     [\"delta needs to be greater than 0\"])]):\n        # The following formula is taken from\n        #   Dwork and Roth, The Algorithmic Foundations of Differential\n        #   Privacy, Appendix A.\n        #   http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf\n        sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps\n\n    l2norm_bound, clip = option\n    if l2norm_bound is None:\n      l2norm_bound, clip = self._default_option\n      if ((tensor_name is not None) and\n          (tensor_name in self._options)):\n        l2norm_bound, clip = self._options[tensor_name]\n    if clip:\n      x = utils.BatchClipByL2norm(x, l2norm_bound)\n\n    if add_noise:\n      if num_examples is None:\n        num_examples = tf.slice(tf.shape(x), [0], [1])\n      privacy_accum_op = self._accountant.accumulate_privacy_spending(\n          eps_delta, sigma, num_examples)\n      with tf.control_dependencies([privacy_accum_op]):\n        saned_x = utils.AddGaussianNoise(tf.reduce_sum(x, 0),\n                                         sigma * l2norm_bound)\n    else:\n      saned_x = tf.reduce_sum(x, 0)\n    return saned_x\n"
  },
  {
    "path": "differential_privacy/dp_sgd/dp_optimizer/utils.py",
    "content": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some small edits by @corcra)\n\n# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Utils for building and training NN models.\n\"\"\"\nfrom __future__ import division\n\nimport math\n\nimport numpy\nimport tensorflow as tf\n\n\nclass LayerParameters(object):\n  \"\"\"class that defines a non-conv layer.\"\"\"\n  def __init__(self):\n    self.name = \"\"\n    self.num_units = 0\n    self._with_bias = False\n    self.relu = False\n    self.gradient_l2norm_bound = 0.0\n    self.bias_gradient_l2norm_bound = 0.0\n    self.trainable = True\n    self.weight_decay = 0.0\n\n\nclass ConvParameters(object):\n  \"\"\"class that defines a conv layer.\"\"\"\n  def __init__(self):\n    self.patch_size = 5\n    self.stride = 1\n    self.in_channels = 1\n    self.out_channels = 0\n    self.with_bias = True\n    self.relu = True\n    self.max_pool = True\n    self.max_pool_size = 2\n    self.max_pool_stride = 2\n    self.trainable = False\n    self.in_size = 28\n    self.name = \"\"\n    self.num_outputs = 0\n    self.bias_stddev = 0.1\n\n\n# Parameters for a layered neural network.\nclass NetworkParameters(object):\n  \"\"\"class that define the overall model structure.\"\"\"\n  def __init__(self):\n    self.input_size = 0\n    self.projection_type = 'NONE'  # NONE, RANDOM, PCA\n    self.projection_dimensions = 0\n    self.default_gradient_l2norm_bound = 0.0\n    self.layer_parameters = []  # List of LayerParameters\n    self.conv_parameters = []  # List of ConvParameters\n\n\ndef GetTensorOpName(x):\n  \"\"\"Get the name of the op that created a tensor.\n\n  Useful for naming related tensors, as ':' in name field of op is not permitted\n\n  Args:\n    x: the input tensor.\n  Returns:\n    the name of the op.\n  \"\"\"\n\n  t = x.name.rsplit(\":\", 1)\n  if len(t) == 1:\n    return x.name\n  else:\n    return t[0]\n\n\ndef BuildNetwork(inputs, network_parameters):\n  \"\"\"Build a network using the given parameters.\n\n  Args:\n    inputs: a Tensor of floats containing the input data.\n    network_parameters: NetworkParameters object\n      that describes the parameters for the network.\n  Returns:\n    output, training_parameters: where the outputs (a tensor) is the output\n      of the network, and training_parameters (a dictionary that maps the\n      name of each variable to a dictionary of parameters) is the parameters\n      used during training.\n  \"\"\"\n\n  training_parameters = {}\n  num_inputs = network_parameters.input_size\n  outputs = inputs\n  projection = None\n\n  # First apply convolutions, if needed\n  for conv_param in network_parameters.conv_parameters:\n    outputs = tf.reshape(\n        outputs,\n        [-1, conv_param.in_size, conv_param.in_size,\n         conv_param.in_channels])\n    conv_weights_name = \"%s_conv_weight\" % (conv_param.name)\n    conv_bias_name = \"%s_conv_bias\" % (conv_param.name)\n    conv_std_dev = 1.0 / (conv_param.patch_size\n                          * math.sqrt(conv_param.in_channels))\n    conv_weights = tf.Variable(\n        tf.truncated_normal([conv_param.patch_size,\n                             conv_param.patch_size,\n                             conv_param.in_channels,\n                             conv_param.out_channels],\n                            stddev=conv_std_dev),\n        trainable=conv_param.trainable,\n        name=conv_weights_name)\n    conv_bias = tf.Variable(\n        tf.truncated_normal([conv_param.out_channels],\n                            stddev=conv_param.bias_stddev),\n        trainable=conv_param.trainable,\n        name=conv_bias_name)\n    training_parameters[conv_weights_name] = {}\n    training_parameters[conv_bias_name] = {}\n    conv = tf.nn.conv2d(outputs, conv_weights,\n                        strides=[1, conv_param.stride,\n                                 conv_param.stride, 1],\n                        padding=\"SAME\")\n    relud = tf.nn.relu(conv + conv_bias)\n    mpd = tf.nn.max_pool(relud, ksize=[1,\n                                       conv_param.max_pool_size,\n                                       conv_param.max_pool_size, 1],\n                         strides=[1, conv_param.max_pool_stride,\n                                  conv_param.max_pool_stride, 1],\n                         padding=\"SAME\")\n    outputs = mpd\n    num_inputs = conv_param.num_outputs\n    # this should equal\n    # in_size * in_size * out_channels / (stride * max_pool_stride)\n\n  # once all the convs are done, reshape to make it flat\n  outputs = tf.reshape(outputs, [-1, num_inputs])\n\n  # Now project, if needed\n  if network_parameters.projection_type is not \"NONE\":\n    projection = tf.Variable(tf.truncated_normal(\n        [num_inputs, network_parameters.projection_dimensions],\n        stddev=1.0 / math.sqrt(num_inputs)), trainable=False, name=\"projection\")\n    num_inputs = network_parameters.projection_dimensions\n    outputs = tf.matmul(outputs, projection)\n\n  # Now apply any other layers\n\n  for layer_parameters in network_parameters.layer_parameters:\n    num_units = layer_parameters.num_units\n    hidden_weights_name = \"%s_weight\" % (layer_parameters.name)\n    hidden_weights = tf.Variable(\n        tf.truncated_normal([num_inputs, num_units],\n                            stddev=1.0 / math.sqrt(num_inputs)),\n        name=hidden_weights_name, trainable=layer_parameters.trainable)\n    training_parameters[hidden_weights_name] = {}\n    if layer_parameters.gradient_l2norm_bound:\n      training_parameters[hidden_weights_name][\"gradient_l2norm_bound\"] = (\n          layer_parameters.gradient_l2norm_bound)\n    if layer_parameters.weight_decay:\n      training_parameters[hidden_weights_name][\"weight_decay\"] = (\n          layer_parameters.weight_decay)\n\n    outputs = tf.matmul(outputs, hidden_weights)\n    if layer_parameters.with_bias:\n      hidden_biases_name = \"%s_bias\" % (layer_parameters.name)\n      hidden_biases = tf.Variable(tf.zeros([num_units]),\n                                  name=hidden_biases_name)\n      training_parameters[hidden_biases_name] = {}\n      if layer_parameters.bias_gradient_l2norm_bound:\n        training_parameters[hidden_biases_name][\n            \"bias_gradient_l2norm_bound\"] = (\n                layer_parameters.bias_gradient_l2norm_bound)\n\n      outputs += hidden_biases\n    if layer_parameters.relu:\n      outputs = tf.nn.relu(outputs)\n    # num_inputs for the next layer is num_units in the current layer.\n    num_inputs = num_units\n\n  return outputs, projection, training_parameters\n\n\ndef VaryRate(start, end, saturate_epochs, epoch):\n  \"\"\"Compute a linearly varying number.\n\n  Decrease linearly from start to end until epoch saturate_epochs.\n\n  Args:\n    start: the initial number.\n    end: the end number.\n    saturate_epochs: after this we do not reduce the number; if less than\n      or equal to zero, just return start.\n    epoch: the current learning epoch.\n  Returns:\n    the caculated number.\n  \"\"\"\n  if saturate_epochs <= 0:\n    return start\n\n  step = (start - end) / (saturate_epochs - 1)\n  if epoch < saturate_epochs:\n    return start - step * epoch\n  else:\n    return end\n\n\ndef BatchClipByL2norm(t, upper_bound, name=None):\n  \"\"\"Clip an array of tensors by L2 norm.\n\n  Shrink each dimension-0 slice of tensor (for matrix it is each row) such\n  that the l2 norm is at most upper_bound. Here we clip each row as it\n  corresponds to each example in the batch.\n\n  Args:\n    t: the input tensor.\n    upper_bound: the upperbound of the L2 norm.\n    name: optional name.\n  Returns:\n    the clipped tensor.\n  \"\"\"\n\n  assert upper_bound > 0\n  with tf.name_scope(values=[t, upper_bound], name=name,\n                     default_name=\"batch_clip_by_l2norm\") as name:\n    saved_shape = tf.shape(t)\n    batch_size = tf.slice(saved_shape, [0], [1])\n    t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))\n    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),\n                              tf.constant(1.0/upper_bound))\n    # Add a small number to avoid divide by 0\n    l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)\n    scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound\n    clipped_t = tf.matmul(tf.diag(scale), t2)\n    clipped_t = tf.reshape(clipped_t, saved_shape, name=name)\n  return clipped_t\n\n\ndef SoftThreshold(t, threshold_ratio, name=None):\n  \"\"\"Soft-threshold a tensor by the mean value.\n\n  Softthreshold each dimension-0 vector (for matrix it is each column) by\n  the mean of absolute value multiplied by the threshold_ratio factor. Here\n  we soft threshold each column as it corresponds to each unit in a layer.\n\n  Args:\n    t: the input tensor.\n    threshold_ratio: the threshold ratio.\n    name: the optional name for the returned tensor.\n  Returns:\n    the thresholded tensor, where each entry is soft-thresholded by\n    threshold_ratio times the mean of the aboslute value of each column.\n  \"\"\"\n\n  assert threshold_ratio >= 0\n  with tf.name_scope(values=[t, threshold_ratio], name=name,\n                     default_name=\"soft_thresholding\") as name:\n    saved_shape = tf.shape(t)\n    t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))\n    t_abs = tf.abs(t2)\n    t_x = tf.sign(t2) * tf.nn.relu(t_abs -\n                                   (tf.reduce_mean(t_abs, [0],\n                                                   keep_dims=True) *\n                                    threshold_ratio))\n    return tf.reshape(t_x, saved_shape, name=name)\n\n\ndef AddGaussianNoise(t, sigma, name=None):\n  \"\"\"Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.\n\n  Args:\n    t: the input tensor.\n    sigma: the stddev of the Gaussian noise.\n    name: optional name.\n  Returns:\n    the noisy tensor.\n  \"\"\"\n\n  with tf.name_scope(values=[t, sigma], name=name,\n                     default_name=\"add_gaussian_noise\") as name:\n    noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)\n  return noisy_t\n\n\ndef GenerateBinomialTable(m):\n  \"\"\"Generate binomial table.\n\n  Args:\n    m: the size of the table.\n  Returns:\n    A two dimensional array T where T[i][j] = (i choose j),\n    for 0<= i, j <=m.\n  \"\"\"\n\n  table = numpy.zeros((m + 1, m + 1), dtype=numpy.float64)\n  for i in range(m + 1):\n    table[i, 0] = 1\n  for i in range(1, m + 1):\n    for j in range(1, m + 1):\n      v = table[i - 1, j] + table[i - 1, j -1]\n      assert not math.isnan(v) and not math.isinf(v)\n      table[i, j] = v\n  return tf.convert_to_tensor(table)\n"
  },
  {
    "path": "differential_privacy/privacy_accountant/tf/accountant.py",
    "content": "# (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,\n# possibly with some small edits by @corcra)\n\n# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Defines Accountant class for keeping track of privacy spending.\n\nA privacy accountant keeps track of privacy spendings. It has methods\naccumulate_privacy_spending and get_privacy_spent. Here we only define\nAmortizedAccountant which tracks the privacy spending in the amortized\nway. It uses privacy amplication via sampling to compute the privacy\nspending for each batch and strong composition (specialized for Gaussian\nnoise) for accumulate the privacy spending.\n\"\"\"\nfrom __future__ import division\n\nimport abc\nimport collections\nimport math\nimport sys\n\nimport numpy\nimport tensorflow as tf\n\nfrom differential_privacy.dp_sgd.dp_optimizer import utils\n\nEpsDelta = collections.namedtuple(\"EpsDelta\", [\"spent_eps\", \"spent_delta\"])\n\nimport pdb\n\n# TODO(liqzhang) To ensure the same API for AmortizedAccountant and\n# MomentsAccountant, we pass the union of arguments to both, so we\n# have unused_sigma for AmortizedAccountant and unused_eps_delta for\n# MomentsAccountant. Consider to revise the API to avoid the unused\n# arguments.  It would be good to use @abc.abstractmethod, etc, to\n# define the common interface as a base class.\nclass AmortizedAccountant(object):\n  \"\"\"Keep track of privacy spending in an amortized way.\n\n  AmortizedAccountant accumulates the privacy spending by assuming\n  all the examples are processed uniformly at random so the spending is\n  amortized among all the examples. And we assume that we use Gaussian noise\n  so the accumulation is on eps^2 and delta, using advanced composition.\n  \"\"\"\n\n  def __init__(self, total_examples):\n    \"\"\"Initialization. Currently only support amortized tracking.\n\n    Args:\n      total_examples: total number of examples.\n    \"\"\"\n\n    assert total_examples > 0\n    self._total_examples = total_examples\n    self._eps_squared_sum = tf.Variable(tf.zeros([1]), trainable=False,\n                                        name=\"eps_squared_sum\")\n    self._delta_sum = tf.Variable(tf.zeros([1]), trainable=False,\n                                  name=\"delta_sum\")\n\n  def accumulate_privacy_spending(self, eps_delta, unused_sigma,\n                                  num_examples):\n    \"\"\"Accumulate the privacy spending.\n\n    Currently only support approximate privacy. Here we assume we use Gaussian\n    noise on randomly sampled batch so we get better composition: 1. the per\n    batch privacy is computed using privacy amplication via sampling bound;\n    2. the composition is done using the composition with Gaussian noise.\n    TODO(liqzhang) Add a link to a document that describes the bounds used.\n\n    Args:\n      eps_delta: EpsDelta pair which can be tensors.\n      unused_sigma: the noise sigma. Unused for this accountant.\n      num_examples: the number of examples involved.\n    Returns:\n      a TensorFlow operation for updating the privacy spending.\n    \"\"\"\n\n    eps, delta = eps_delta\n    with tf.control_dependencies(\n        [tf.Assert(tf.greater(delta, 0),\n                   [\"delta needs to be greater than 0\"])]):\n      amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /\n                        self._total_examples)\n      # Use privacy amplification via sampling bound.\n      # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf\n      # TODO(liqzhang) Add a link to a document with formal statement\n      # and proof.\n      amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (\n          tf.exp(eps) - 1.0)), [1])\n      amortize_delta = tf.reshape(amortize_ratio * delta, [1])\n      return tf.group(*[tf.assign_add(self._eps_squared_sum,\n                                      tf.square(amortize_eps)),\n                        tf.assign_add(self._delta_sum, amortize_delta)])\n\n  def get_privacy_spent(self, sess, target_eps=None):\n    \"\"\"Report the spending so far.\n\n    Args:\n      sess: the session to run the tensor.\n      target_eps: the target epsilon. Unused.\n    Returns:\n      the list containing a single EpsDelta, with values as Python floats (as\n      opposed to numpy.float64). This is to be consistent with\n      MomentAccountant which can return a list of (eps, delta) pair.\n    \"\"\"\n\n    # pylint: disable=unused-argument\n    unused_target_eps = target_eps\n    eps_squared_sum, delta_sum = sess.run([self._eps_squared_sum,\n                                           self._delta_sum])\n    return [EpsDelta(math.sqrt(eps_squared_sum), float(delta_sum))]\n\n\nclass MomentsAccountant(object):\n  \"\"\"Privacy accountant which keeps track of moments of privacy loss.\n\n  Note: The constructor of this class creates tf.Variables that must\n  be initialized with tf.global_variables_initializer() or similar calls.\n\n  MomentsAccountant accumulates the high moments of the privacy loss. It\n  requires a method for computing differenital moments of the noise (See\n  below for the definition). So every specific accountant should subclass\n  this class by implementing _differential_moments method.\n\n  Denote by X_i the random variable of privacy loss at the i-th step.\n  Consider two databases D, D' which differ by one item. X_i takes value\n  log Pr[M(D')==x]/Pr[M(D)==x] with probability Pr[M(D)==x].\n  In MomentsAccountant, we keep track of y_i(L) = log E[exp(L X_i)] for some\n  large enough L. To compute the final privacy spending,  we apply Chernoff\n  bound (assuming the random noise added at each step is independent) to\n  bound the total privacy loss Z = sum X_i as follows:\n    Pr[Z > e] = Pr[exp(L Z) > exp(L e)]\n              < E[exp(L Z)] / exp(L e)\n              = Prod_i E[exp(L X_i)] / exp(L e)\n              = exp(sum_i log E[exp(L X_i)]) / exp(L e)\n              = exp(sum_i y_i(L) - L e)\n  Hence the mechanism is (e, d)-differentially private for\n    d =  exp(sum_i y_i(L) - L e).\n  We require d < 1, i.e. e > sum_i y_i(L) / L. We maintain y_i(L) for several\n  L to compute the best d for any give e (normally should be the lowest L\n  such that 2 * sum_i y_i(L) / L < e.\n\n  We further assume that at each step, the mechanism operates on a random\n  sample with sampling probability q = batch_size / total_examples. Then\n    E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]\n  By distinguishing two cases of whether D < D' or D' < D, we have\n  that\n    E[exp(L X)] <= max (I1, I2)\n  where\n    I1 = (1-q) E ((1-q) + q P(X+1) / P(X))^L + q E ((1-q) + q P(X) / P(X-1))^L\n    I2 = E (P(X) / ((1-q) + q P(X+1)))^L\n\n  In order to compute I1 and I2, one can consider to\n    1. use an asymptotic bound, which recovers the advance composition theorem;\n    2. use the closed formula (like GaussianMomentsAccountant);\n    3. use numerical integration or random sample estimation.\n\n  Dependent on the distribution, we can often obtain a tigher estimation on\n  the moments and hence a more accurate estimation of the privacy loss than\n  obtained using generic composition theorems.\n\n  \"\"\"\n\n  __metaclass__ = abc.ABCMeta\n\n  def __init__(self, total_examples, moment_orders=32):\n    \"\"\"Initialize a MomentsAccountant.\n\n    Args:\n      total_examples: total number of examples.\n      moment_orders: the order of moments to keep.\n    \"\"\"\n\n    assert total_examples > 0\n    self._total_examples = total_examples\n    self._moment_orders = (moment_orders\n                           if isinstance(moment_orders, (list, tuple))\n                           else range(1, moment_orders + 1))\n    self._max_moment_order = max(self._moment_orders)\n    assert self._max_moment_order < 100, \"The moment order is too large.\"\n    self._log_moments = [tf.Variable(numpy.float64(0.0),\n                                     trainable=False,\n                                     name=(\"log_moments-%d\" % moment_order))\n                         for moment_order in self._moment_orders]\n\n  @abc.abstractmethod\n  def _compute_log_moment(self, sigma, q, moment_order):\n    \"\"\"Compute high moment of privacy loss.\n\n    Args:\n      sigma: the noise sigma, in the multiples of the sensitivity.\n      q: the sampling ratio.\n      moment_order: the order of moment.\n    Returns:\n      log E[exp(moment_order * X)]\n    \"\"\"\n    pass\n\n  def accumulate_privacy_spending(self, unused_eps_delta,\n                                  sigma, num_examples):\n    \"\"\"Accumulate privacy spending.\n\n    In particular, accounts for privacy spending when we assume there\n    are num_examples, and we are releasing the vector\n    (sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma)\n    where l2norm_bound is the maximum l2_norm of each example x_i, and\n    the num_examples have been randomly selected out of a pool of\n    self.total_examples.\n\n    Args:\n      unused_eps_delta: EpsDelta pair which can be tensors. Unused\n        in this accountant.\n      sigma: the noise sigma, in the multiples of the sensitivity (that is,\n        if the l2norm sensitivity is k, then the caller must have added\n        Gaussian noise with stddev=k*sigma to the result of the query).\n      num_examples: the number of examples involved.\n    Returns:\n      a TensorFlow operation for updating the privacy spending.\n    \"\"\"\n    q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples\n\n    moments_accum_ops = []\n    for i in range(len(self._log_moments)):\n      moment = self._compute_log_moment(sigma, q, self._moment_orders[i])\n      moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment))\n    return tf.group(*moments_accum_ops)\n\n  def _compute_delta(self, log_moments, eps):\n    \"\"\"Compute delta for given log_moments and eps.\n\n    Args:\n      log_moments: the log moments of privacy loss, in the form of pairs\n        of (moment_order, log_moment)\n      eps: the target epsilon.\n    Returns:\n      delta\n    \"\"\"\n    min_delta = 1.0\n    for moment_order, log_moment in log_moments:\n      if math.isinf(log_moment) or math.isnan(log_moment):\n        sys.stderr.write(\"The %d-th order is inf or Nan\\n\" % moment_order)\n        continue\n      if log_moment < moment_order * eps:\n        min_delta = min(min_delta,\n                        math.exp(log_moment - moment_order * eps))\n    return min_delta\n\n  def _compute_eps(self, log_moments, delta):\n    min_eps = float(\"inf\")\n    for moment_order, log_moment in log_moments:\n      if math.isinf(log_moment) or math.isnan(log_moment):\n        sys.stderr.write(\"The %d-th order is inf or Nan\\n\" % moment_order)\n        continue\n      min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order)\n    return min_eps\n\n  def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):\n    \"\"\"Compute privacy spending in (e, d)-DP form for a single or list of eps.\n\n    Args:\n      sess: the session to run the tensor.\n      target_eps: a list of target epsilon's for which we would like to\n        compute corresponding delta value.\n      target_deltas: a list of target deltas for which we would like to\n        compute the corresponding eps value. Caller must specify\n        either target_eps or target_delta.\n    Returns:\n      A list of EpsDelta pairs.\n    \"\"\"\n    assert (target_eps is None) ^ (target_deltas is None)\n    eps_deltas = []\n    log_moments = sess.run(self._log_moments)\n    log_moments_with_order = numpy.array(list(zip(self._moment_orders, log_moments)))\n    if target_eps is not None:\n      for eps in target_eps:\n        delta = self._compute_delta(log_moments_with_order, eps)\n        eps_deltas.append(EpsDelta(eps, delta))\n    else:\n      assert target_deltas\n      for delta in target_deltas:\n        eps_deltas.append(\n            EpsDelta(self._compute_eps(log_moments_with_order, delta), delta))\n    return eps_deltas\n\n\nclass GaussianMomentsAccountant(MomentsAccountant):\n  \"\"\"MomentsAccountant which assumes Gaussian noise.\n\n  GaussianMomentsAccountant assumes the noise added is centered Gaussian\n  noise N(0, sigma^2 I). In this case, we can compute the differential moments\n  accurately using a formula.\n\n  For asymptotic bound, for Gaussian noise with variance sigma^2, we can show\n  for L < sigma^2,  q L < sigma,\n    log E[exp(L X)] = O(q^2 L^2 / sigma^2).\n  Using this we derive that for training T epoches, with batch ratio q,\n  the Gaussian mechanism with variance sigma^2 (with q < 1/sigma) is (e, d)\n  private for d = exp(T/q q^2 L^2 / sigma^2 - L e). Setting L = sigma^2,\n  Tq = e/2, the mechanism is (e, exp(-e sigma^2/2))-DP. Equivalently, the\n  mechanism is (e, d)-DP if sigma = sqrt{2 log(1/d)}/e, q < 1/sigma,\n  and T < e/(2q). This bound is better than the bound obtained using general\n  composition theorems, by an Omega(sqrt{log k}) factor on epsilon, if we run\n  k steps. Since we use direct estimate, the obtained privacy bound has tight\n  constant.\n\n  For GaussianMomentAccountant, it suffices to compute I1, as I1 >= I2,\n  which reduce to computing E(P(x+s)/P(x+s-1) - 1)^i for s = 0 and 1. In the\n  companion gaussian_moments.py file, we supply procedure for computing both\n  I1 and I2 (the computation of I2 is through multi-precision integration\n  package). It can be verified that indeed I1 >= I2 for wide range of parameters\n  we have tried, though at the moment we are unable to prove this claim.\n\n  We recommend that when using this accountant, users independently verify\n  using gaussian_moments.py that for their parameters, I1 is indeed larger\n  than I2. This can be done by following the instructions in\n  gaussian_moments.py.\n  \"\"\"\n\n  def __init__(self, total_examples, moment_orders=32):\n    \"\"\"Initialization.\n\n    Args:\n      total_examples: total number of examples.\n      moment_orders: the order of moments to keep.\n    \"\"\"\n    super(self.__class__, self).__init__(total_examples, moment_orders)\n    self._binomial_table = utils.GenerateBinomialTable(self._max_moment_order)\n\n  def _differential_moments(self, sigma, s, t):\n    \"\"\"Compute 0 to t-th differential moments for Gaussian variable.\n\n        E[(P(x+s)/P(x+s-1)-1)^t]\n      = sum_{i=0}^t (t choose i) (-1)^{t-i} E[(P(x+s)/P(x+s-1))^i]\n      = sum_{i=0}^t (t choose i) (-1)^{t-i} E[exp(-i*(2*x+2*s-1)/(2*sigma^2))]\n      = sum_{i=0}^t (t choose i) (-1)^{t-i} exp(i(i+1-2*s)/(2 sigma^2))\n    Args:\n      sigma: the noise sigma, in the multiples of the sensitivity.\n      s: the shift.\n      t: 0 to t-th moment.\n    Returns:\n      0 to t-th moment as a tensor of shape [t+1].\n    \"\"\"\n    assert t <= self._max_moment_order, (\"The order of %d is out \"\n                                         \"of the upper bound %d.\"\n                                         % (t, self._max_moment_order))\n    binomial = tf.slice(self._binomial_table, [0, 0],\n                        [t + 1, t + 1])\n    signs = numpy.zeros((t + 1, t + 1), dtype=numpy.float64)\n    for i in range(t + 1):\n      for j in range(t + 1):\n        signs[i, j] = 1.0 - 2 * ((i - j) % 2)\n    exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)\n                             for j in range(t + 1)], dtype=tf.float64)\n    # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}\n    x = tf.multiply(binomial, signs)\n    # y[i, j] = x[i, j] * exp(exponents[j])\n    #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))\n    # Note: this computation is done by broadcasting pointwise multiplication\n    # between [t+1, t+1] tensor and [t+1] tensor.\n    y = tf.multiply(x, tf.exp(exponents))\n    # z[i] = sum_j y[i, j]\n    #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))\n    z = tf.reduce_sum(y, 1)\n    return z\n\n  def _compute_log_moment(self, sigma, q, moment_order):\n    \"\"\"Compute high moment of privacy loss.\n\n    Args:\n      sigma: the noise sigma, in the multiples of the sensitivity.\n      q: the sampling ratio.\n      moment_order: the order of moment.\n    Returns:\n      log E[exp(moment_order * X)]\n    \"\"\"\n    assert moment_order <= self._max_moment_order, (\"The order of %d is out \"\n                                                    \"of the upper bound %d.\"\n                                                    % (moment_order,\n                                                       self._max_moment_order))\n    binomial_table = tf.slice(self._binomial_table, [moment_order, 0],\n                              [1, moment_order + 1])\n    # qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q))\n    qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)],\n                            dtype=tf.float64) * tf.cast(\n                                tf.log(q), dtype=tf.float64))\n    moments0 = self._differential_moments(sigma, 0.0, moment_order)\n    term0 = tf.reduce_sum(binomial_table * qs * moments0)\n    moments1 = self._differential_moments(sigma, 1.0, moment_order)\n    term1 = tf.reduce_sum(binomial_table * qs * moments1)\n    return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1,\n                                     tf.float64)))\n\n\nclass DummyAccountant(object):\n  \"\"\"An accountant that does no accounting.\"\"\"\n\n  def accumulate_privacy_spending(self, *unused_args):\n    return tf.no_op()\n\n  def get_privacy_spent(self, unused_sess, **unused_kwargs):\n    return [EpsDelta(numpy.inf, 1.0)]\n"
  },
  {
    "path": "eugenium_mmd.py",
    "content": "'''\nCode taken from: https://github.com/eugenium/mmd\n(modified slightly for efficiency/PEP by Stephanie Hyland)\n\nPython implementation of MMD and Covariance estimates for Relative MMD\n\nSome code is based on code from Vincent Van Asch \nwhich is based on matlab code from Arthur Gretton\n\n\nEugene Belilovsky\neugene.belilovsky@inria.fr\n'''\nimport numpy as np\nimport scipy as sp\nfrom numpy import sqrt\nfrom sklearn.metrics.pairwise import rbf_kernel\nfrom functools import partial\nimport pdb\n\ndef my_kernel(X, Y, sigma):\n    gamma = 1 / (2 * sigma**2)\n    if len(X.shape) == 2:\n        X_sqnorms = np.einsum('...i,...i', X, X)\n        Y_sqnorms = np.einsum('...i,...i', Y, Y)\n        XY = np.einsum('ia,ja', X, Y)\n    elif len(X.shape) == 3:\n        X_sqnorms = np.einsum('...ij,...ij', X, X)\n        Y_sqnorms = np.einsum('...ij,...ij', Y, Y)\n        XY = np.einsum('iab,jab', X, Y)\n    else:\n        pdb.set_trace()\n    Kxy = np.exp(-gamma*(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1)))\n    return Kxy\n\ndef MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=False):\n    '''Performs the relative MMD test which returns a test statistic for whether Y is closer to X or than Z.\n    See http://arxiv.org/pdf/1511.04581.pdf\n    The bandwith heuristic is based on the median heuristic (see Smola,Gretton).\n    '''\n    if(sigma<0):\n        #Similar heuristics\n        if SelectSigma:\n            siz=np.min((1000, X.shape[0]))\n            sigma1=kernelwidthPair(X[0:siz], Y[0:siz]);\n            sigma2=kernelwidthPair(X[0:siz], Z[0:siz]);\n            sigma=(sigma1+sigma2)/2.\n        else:\n            siz=np.min((1000, X.shape[0]*3))\n            Zem=np.r_[X[0:siz/3], Y[0:siz/3], Z[0:siz/3]]\n            sigma=kernelwidth(Zem);\n\n    #kernel = partial(rbf_kernel, gamma=1.0/(sigma**2))\n    kernel = partial(my_kernel, sigma=sigma)\n    #kernel = partial(grbf, sigma=sigma)\n\n    Kyy = kernel(Y, Y)\n    Kzz = kernel(Z, Z)\n    Kxy = kernel(X, Y)\n    Kxz = kernel(X, Z)\n\n    Kyynd = Kyy-np.diag(np.diagonal(Kyy))\n    Kzznd = Kzz-np.diag(np.diagonal(Kzz))\n    m = Kxy.shape[0];\n    n = Kyy.shape[0];\n    r = Kzz.shape[0];    \n\n    \n    u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) )\n    u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) )\n    u_xy=np.sum(Kxy)/(m*n)\n    u_xz=np.sum(Kxz)/(m*r)\n    #Compute the test statistic\n    t=u_yy - 2.*u_xy - (u_zz-2.*u_xz)\n    Diff_Var, Diff_Var_z2, data=MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz)\n\n    pvalue=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var)))\n  #  pvalue_z2=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var_z2)))\n    tstat=t/sqrt(Diff_Var)\n    \n    if(computeMMDs):\n         Kxx = kernel(X, X)\n         Kxxnd = Kxx-np.diag(np.diagonal(Kxx))\n         u_xx=np.sum(Kxxnd)*( 1./(m*(m-1)) )\n         MMDXY=u_xx+u_yy-2.*u_xy\n         MMDXZ=u_xx+u_zz-2.*u_xz\n    else:\n         MMDXY=None\n         MMDXZ=None\n    return pvalue, tstat, sigma, MMDXY, MMDXZ\n    \ndef MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz):\n    '''\n    Compute the variance of the difference statistic MMDXY-MMDXZ\n    See http://arxiv.org/pdf/1511.04581.pdf Appendix for derivations\n    '''\n    m = Kxy.shape[0];\n    n = Kyy.shape[0];\n    r = Kzz.shape[0];\n    \n    \n    Kyynd = Kyy-np.diag(np.diagonal(Kyy));\n    Kzznd = Kzz-np.diag(np.diagonal(Kzz));\n    \n    u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) );\n    u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) );\n    u_xy=np.sum(Kxy)/(m*n);\n    u_xz=np.sum(Kxz)/(m*r);\n    \n    #compute zeta1\n    t1=(1./n**3)*np.sum(Kyynd.T.dot(Kyynd))-u_yy**2;\n    t2=(1./(n**2*m))*np.sum(Kxy.T.dot(Kxy))-u_xy**2;\n    t3=(1./(n*m**2))*np.sum(Kxy.dot(Kxy.T))-u_xy**2;\n    t4=(1./r**3)*np.sum(Kzznd.T.dot(Kzznd))-u_zz**2;\n    t5=(1./(r*m**2))*np.sum(Kxz.dot(Kxz.T))-u_xz**2;\n    t6=(1./(r**2*m))*np.sum(Kxz.T.dot(Kxz))-u_xz**2;\n    t7=(1./(n**2*m))*np.sum(Kyynd.dot(Kxy.T))-u_yy*u_xy;\n    t8=(1./(n*m*r))*np.sum(Kxy.T.dot(Kxz))-u_xz*u_xy;\n    t9=(1./(r**2*m))*np.sum(Kzznd.dot(Kxz.T))-u_zz*u_xz;\n    \n    zeta1=(t1+t2+t3+t4+t5+t6-2.*(t7+t8+t9)); \n    \n    zeta2=(1/m/(m-1))*np.sum((Kyynd-Kzznd-Kxy.T-Kxy+Kxz+Kxz.T)**2)-(u_yy - 2.*u_xy - (u_zz-2.*u_xz))**2;\n    \n    \n    data=dict({'t1':t1,\n               't2':t2,\n               't3':t3,\n               't4':t4,\n               't5':t5,\n               't6':t6,\n               't7':t7,\n               't8':t8,\n               't9':t9,\n               'zeta1':zeta1,\n               'zeta2':zeta2,\n                })\n    #TODO more precise version for zeta2 \n    #    xx=(1/m^2)*sum(sum(Kxxnd.*Kxxnd))-u_xx^2;\n    # yy=(1/n^2)*sum(sum(Kyynd.*Kyynd))-u_yy^2;\n    #xy=(1/(n*m))*sum(sum(Kxy.*Kxy))-u_xy^2;\n    #xxy=(1/(n*m^2))*sum(sum(Kxxnd*Kxy))-u_xx*u_xy;\n    #yyx=(1/(n^2*m))*sum(sum(Kyynd*Kxy'))-u_yy*u_xy;\n    #zeta2=(xx+yy+xy+xy-2*(xxy+xxy +yyx+yyx))\n    \n    \n    Var=(4.*(m-2)/(m*(m-1)))*zeta1;\n    Var_z2=Var+(2./(m*(m-1)))*zeta2;\n\n    return Var, Var_z2, data\ndef grbf(x1, x2, sigma):\n    '''Calculates the Gaussian radial base function kernel'''\n    n, nfeatures = x1.shape\n    m, mfeatures = x2.shape\n    \n    k1 = np.sum((x1*x1), 1)\n    q = np.tile(k1, (m, 1)).transpose()\n    del k1\n    \n    k2 = np.sum((x2*x2), 1)\n    r = np.tile(k2.T, (n, 1))\n    del k2\n    \n    h = q + r\n    del q, r\n    \n    # The norm\n    h = h - 2*np.dot(x1, x2.transpose())\n    h = np.array(h, dtype=float)\n    \n    return np.exp(-1.*h/(2.*pow(sigma, 2)))\n    \n    \ndef kernelwidthPair(x1, x2):\n    '''Implementation of the median heuristic. See Gretton 2012\n       Pick sigma such that the exponent of exp(- ||x-y|| / (2*sigma2)),\n       in other words ||x-y|| / (2*sigma2),  equals 1 for the median distance x\n       and y of all distances between points from both data sets X and Y.\n    '''\n    n, nfeatures = x1.shape\n    m, mfeatures = x2.shape\n    \n    k1 = np.sum((x1*x1), 1)\n    q = np.tile(k1, (m, 1)).transpose()\n    del k1\n    \n    k2 = np.sum((x2*x2), 1)\n    r = np.tile(k2, (n, 1))\n    del k2\n    \n    h= q + r\n    del q, r\n    \n    # The norm\n    h = h - 2*np.dot(x1, x2.transpose())\n    h = np.array(h, dtype=float)\n    \n    mdist = np.median([i for i in h.flat if i])\n    \n    sigma = sqrt(mdist/2.0)\n    if not sigma: sigma = 1\n    \n    return sigma\ndef kernelwidth(Zmed):\n    '''Alternative median heuristic when we cant partition the points\n    '''\n    m= Zmed.shape[0]\n    k1 = np.expand_dims(np.sum((Zmed*Zmed), axis=1), 1)\n    q = np.kron(np.ones((1, m)), k1)\n    r = np.kron(np.ones((m, 1)), k1.T)\n    del k1\n    \n    h= q + r\n    del q, r\n    \n    # The norm\n    h = h - 2.*Zmed.dot(Zmed.T)\n    h = np.array(h, dtype=float)\n    \n    mdist = np.median([i for i in h.flat if i])\n    \n    sigma = sqrt(mdist/2.0)\n    if not sigma: sigma = 1\n    \n    return sigma\n    \n    \n\ndef MMD_unbiased(Kxx, Kyy, Kxy):\n#The estimate when distribution of x is not equal to y\n    m = Kxx.shape[0]\n    n = Kyy.shape[0]\n    \n    t1 = (1./(m*(m-1)))*np.sum(Kxx - np.diag(np.diagonal(Kxx)))\n    t2 = (2./(m*n)) * np.sum(Kxy)\n    t3 = (1./(n*(n-1)))* np.sum(Kyy - np.diag(np.diagonal(Kyy)))\n    \n    MMDsquared = (t1-t2+t3)\n    \n    return MMDsquared\n"
  },
  {
    "path": "eval.py",
    "content": "#!/usr/bin/env ipython\n# Evaluation of models\n#\n\nimport json\nimport pdb\nimport numpy as np\nimport pandas as pd\nfrom eugenium_mmd import MMD_3_Sample_Test\nfrom scipy.stats import ks_2samp\nimport mmd\nfrom sklearn.svm import SVC\nfrom sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score\nfrom sklearn.ensemble import RandomForestClassifier\nimport sklearn\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\n\n# for keras\nimport keras\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Dropout, Flatten\nfrom keras.layers import Conv2D, MaxPooling2D\nfrom keras.backend import clear_session\n\nimport model\nimport data_utils\nimport plotting\n\nimport pickle\n\ndef assert_same_data(A, B):\n    # case 0, both loaded\n    if A['data'] == 'load' and B['data'] == 'load':\n        assert A['data_load_from'] == B['data_load_from']\n        data_path = './experiments/data/' + A['data_load_from']\n    elif A['data'] == 'load' and (not B['data'] == 'load'):\n        assert A['data_load_from'] == B['identifier']\n        data_path = './experiments/data/' + A['data_load_from']\n    elif (not A['data'] == 'load') and B['data'] == 'load':\n        assert B['data_load_from'] == A['identifier']\n        data_path = './experiments/data/' + A['identifier']\n    else:\n        raise ValueError(A['data'], B['data'])\n    return data_path\n\ndef model_memorisation(identifier, epoch, max_samples=2000, tstr=False):\n    \"\"\"\n    Compare samples from a model against training set and validation set in mmd\n    \"\"\"\n    if tstr:\n        print('Loading data from TSTR experiment (not sampling from model)')\n        # load pre-generated samples\n        synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()\n        model_samples = synth_data['samples']\n        synth_labels = synth_data['labels']\n        # load real data used in that experiment\n        real_data = np.load('./experiments/data/' + identifier + '.data.npy').item()\n        real_samples = real_data['samples']\n        train = real_samples['train']\n        test = real_samples['test']\n        n_samples = test.shape[0]\n        if model_samples.shape[0] > n_samples:\n            model_samples = np.random.permutation(model_samples)[:n_samples]\n        print('Data loaded successfully!')\n    else:\n        if identifier == 'cristobal_eICU':\n            model_samples = pickle.load(open('REDACTED', 'rb'))\n            samples, labels = data_utils.eICU_task()\n            train = samples['train'].reshape(-1,16,4)\n            vali = samples['vali'].reshape(-1,16,4)\n            test = samples['test'].reshape(-1,16,4)\n            #train_targets = labels['train']\n            #vali_targets = labels['vali']\n            #test_targets = labels['test']\n            train, vali, test = data_utils.scale_data(train, vali, test)\n            n_samples = test.shape[0]\n            if n_samples > max_samples:\n                n_samples = max_samples\n                test = np.random.permutation(test)[:n_samples]\n            if model_samples.shape[0] > n_samples:\n                model_samples = np.random.permutation(model_samples)[:n_samples]\n        elif identifier == 'cristobal_MNIST':\n            the_dir = 'REDACTED'\n            # pick a random one\n            which = np.random.choice(['NEW_OK_', '_r4', '_r5', '_r6', '_r7'])\n            model_samples, model_labels = pickle.load(open(the_dir + 'synth_mnist_minist_cdgan_1_2_100_multivar_14_nolr_rdim3_0_2_' + which + '_190.pk', 'rb'))\n            # get test and train...\n            # (generated with fixed seed...)\n            mnist_resized_dim = 14\n            samples, labels = data_utils.load_resized_mnist(mnist_resized_dim)\n            proportions = [0.6, 0.2, 0.2]\n            train, vali, test, labels_split = data_utils.split(samples, labels=labels, random_seed=1, proportions=proportions)\n            np.random.seed()\n            train = train.reshape(-1, 14, 14)\n            test = test.reshape(-1, 14, 14)\n            vali = vali.reshape(-1, 14, 14)\n            n_samples = test.shape[0]\n            if n_samples > max_samples:\n                n_samples = max_samples\n                test = np.random.permutation(test)[:n_samples]\n            if model_samples.shape[0] > n_samples:\n                model_samples = np.random.permutation(model_samples)[:n_samples]\n        else:\n            settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n            # get the test, train sets\n            data = np.load('./experiments/data/' + identifier + '.data.npy').item()\n            train = data['samples']['train']\n            test = data['samples']['test']\n            n_samples = test.shape[0]\n            if n_samples > max_samples:\n                n_samples = max_samples\n                test = np.random.permutation(test)[:n_samples]\n            model_samples = model.sample_trained_model(settings, epoch, n_samples)\n    all_samples = np.vstack([train, test, model_samples])\n    heuristic_sigma = mmd.median_pairwise_distance(all_samples)\n    print('heuristic sigma:', heuristic_sigma)\n    pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, test, np.random.permutation(train)[:n_samples], sigma=heuristic_sigma, computeMMDs=False)\n    #pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, np.random.permutation(train)[:n_samples], test, sigma=heuristic_sigma, computeMMDs=False)\n#    if pvalue < 0.05:\n#        print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that the test data has a smaller MMD with the true data than the generated data')\n        # the function takes (X, Y, Z) as its first arguments, it's testing if MMDXY (i.e. MMD between model and train) is less than MMDXZ (MMd between model and test)\n#    else:\n#        print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclu#de that the test data has a smaller MMD with the true data than the generated data')\n    return pvalue, tstat, sigma\n\ndef model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99):\n    \"\"\"\n    Compare two models using relative MMD test\n    \"\"\"\n    # make sure they used the same data\n    settings_A = json.load(open('./experiments/settings/' + identifier_A + '.txt', 'r'))\n    settings_B = json.load(open('./experiments/settings/' + identifier_B + '.txt', 'r'))\n    data_path = assert_same_data(settings_A, settings_B)\n    # now load the data\n    data = np.load(data_path + '.data.npy').item()['samples']['vali']\n    n_samples = data.shape[0]\n    A_samples = model.sample_trained_model(settings_A, epoch_A, n_samples)\n    B_samples = model.sample_trained_model(settings_B, epoch_B, n_samples)\n    # do the comparison\n    # TODO: support multiple signals\n    ## some notes about this test:\n    ## MMD_3_Sample_Test(X, Y, Z) tests the hypothesis that Px is closer to Pz than Py\n    ## that is, test the null hypothesis H0:\n    ##   MMD(F, Px, Py) <= MMD(F, Px, Pz)\n    ## versus the alternate hypothesis:\n    ##   MMD(F, Px, Py) > MMD(F, Px, Pz)\n    ## at significance level that we select later (just the threshold on the p-value)\n    pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(data[:, :, 0], A_samples[:, :, 0], B_samples[:, :, 0], computeMMDs=True)\n    print(pvalue, tstat, sigma)\n    if pvalue < 0.05:\n        print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)\n    else:\n        print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A)\n    return pvalue, tstat, sigma, MMDXY, MMDXZ\n\n# --- to do with reconstruction --- #\n\ndef get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False):\n    \"\"\"\n    Get the reconstruction error of every point in the training set of a given\n    experiment.\n    \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    if settings['data_load_from']:\n        data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item()\n    else:\n        data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item()\n    samples = data_dict['samples']\n    train = samples['train']\n    vali = samples['vali']\n    test = samples['test']\n    labels = data_dict['labels']\n    train_labels, test_labels, synth_labels, vali_labels = None, None, None, None\n    try:\n        if rerun:\n            raise FileNotFoundError\n        errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item()\n        train_errors = errors['train']\n        test_errors = errors['test']\n        generated_errors = errors['generated']\n        noisy_errors = errors['noisy']\n        print('Loaded precomputed errors')\n    except FileNotFoundError:\n        if tstr:\n            synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()\n            generated = synth_data['samples']\n            synth_labels = synth_data['labels']\n            train_labels = labels['train']\n            test_labels = labels['test']\n            vali_labels = labels['vali']\n        else:\n            # generate new data\n            n_eval = 500\n            # generate \"easy\" samples from the distribution\n            generated = model.sample_trained_model(settings, epoch, n_eval)\n            # generate \"hard' random samples, not from train/test distribution\n            # TODO: use original validation examples, add noise etc.\n        ##    random_samples = np.random.normal(size=generated.shape)\n        #    random_samples -= np.mean(random_samples, axis=0) \n        #    random_samples += np.mean(vali, axis=0)\n        #    random_samples /= np.std(random_samples, axis=0)\n        #    random_samples *= np.std(vali, axis=0)\n\n        # get all the errors\n        print('Getting reconstruction errors on train set')\n        if train.shape[0] > max_samples:\n            index_subset = np.random.permutation(train.shape[0])[:max_samples]\n            train = train[index_subset]\n            if train_labels is not None:\n                train_labels = train_labels[index_subset]\n        train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels)\n        print('Getting reconstruction errors on test set')\n        if test.shape[0] > max_samples:\n            index_subset = np.random.permutation(test.shape[0])[:max_samples]\n            test = test[index_subset]\n            if test_labels is not None:\n                test_labels = test_labels[index_subset]\n        test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels)\n        D_test, p_test = ks_2samp(train_errors, test_errors)\n        print('KS statistic and p-value for train v. test erors:', D_test, p_test)\n        pdb.set_trace()\n        print('Getting reconstruction errors on generated set')\n        generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels)\n        D_gen, p_gen = ks_2samp(generated_errors, train_errors)\n        print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)\n        D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)\n        print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)\n#        print('Getting reconstruction errors on noisy set')\n#        alpha = 0.5\n#        noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali)\n#        noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels)\n        noisy_errors = None\n        # save!\n        errors = {'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors}\n        np.save('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors)\n    # do two-sample Kolomogorov-Smirnov test for equality\n    D_test, p_test = ks_2samp(train_errors, test_errors)\n    print('KS statistic and p-value for train v. test erors:', D_test, p_test)\n    D_gen, p_gen = ks_2samp(generated_errors, train_errors)\n    print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)\n    D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)\n    print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest)\n    # visualise distribution of errors for train and test\n    plotting.reconstruction_errors(identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors)\n    # visualise the \"hardest\" and \"easiest\" samples from train\n    ranking_train = np.argsort(train_errors)\n    easiest_train = ranking_train[:6]\n    hardest_train = ranking_train[-6:]\n    plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2)\n    plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2)\n    # visualise the \"hardest\" and \"easiest\" samples from random\n#    ranking_random = np.argsort(noisy_errors)\n#    easiest_random = ranking_random[:6]\n#    hardest_random = ranking_random[-6:]\n#    plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2)\n#    plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2)\n    return True\n\ndef error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g_tolerance=0.025, use_min=True, C_samples=None):\n    \"\"\"\n    Get (average over a few runs) of the reconstruction error per sample\n    \"\"\"\n    n_samples = samples.shape[0]\n    heuristic_sigma = np.float32(mmd.median_pairwise_distance(samples))\n    errors = np.zeros(shape=(n_samples, n_rep))\n    for rep in range(n_rep):\n        Z, rep_errors, sigma = model.invert(identifier, epoch, samples, n_iter=n_iter, heuristic_sigma=heuristic_sigma, g_tolerance=g_tolerance, C_samples=C_samples)\n        errors[:, rep] = rep_errors\n    # return min, or average?\n    if use_min:\n        errors = np.min(errors, axis=1)\n    else:\n        # use mean\n        errors = np.mean(errors, axis=1)\n    return errors\n\n# --- visualisation evaluation --- #\n\ndef view_digit(identifier, epoch, digit, n_samples=6):\n    \"\"\"\n    Generate a bunch of MNIST digits from a CGAN, view them\n    \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    if settings['one_hot']:\n        assert settings['max_val'] == 1\n        assert digit <= settings['cond_dim']\n        C_samples = np.zeros(shape=(n_samples, settings['cond_dim']))\n        C_samples[:, digit] = 1\n    else:\n        assert settings['cond_dim'] == 1\n        assert digit <= settings['max_val']\n        C_samples = np.array([digit]*n_samples).reshape(-1, 1)\n    digit_samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples=None, cond_dim=settings['cond_dim'], C_samples=C_samples)\n    digit_samples = digit_samples.reshape(n_samples, -1, 1)\n    # visualise\n    plotting.save_mnist_plot_sample(digit_samples, digit, identifier + '_' + str(epoch) + '_digit_', n_samples)\n    return True\n\ndef view_interpolation(identifier, epoch, n_steps=6, input_samples=None, e_tolerance=0.01, sigma=3.29286853021):\n    \"\"\"\n    If samples: generate interpolation between real points\n    Else:\n        Sample two points in the latent space, view a linear interpolation between them.\n    \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    if input_samples is None:\n        # grab two trainng examples\n        data = np.load('./experiments/data/' + identifier + '.data.npy').item()\n        train = data['samples']['train']\n        input_samples = np.random.permutation(train)[:2]\n#        Z_sampleA, Z_sampleB = model.sample_Z(2, settings['seq_length'], settings['latent_dim'], \n#                                          settings['use_time'])\n        if sigma is None:\n            ## gotta get a sigma somehow\n            sigma = mmd.median_pairwise_distance(train)\n            print('Calcualted heuristic sigma from training data:', sigma)\n    Zs, error, _ = model.invert(settings, epoch, input_samples, e_tolerance=e_tolerance)\n    Z_sampleA, Z_sampleB = Zs\n    Z_samples = plotting.interpolate(Z_sampleA, Z_sampleB, n_steps=n_steps)\n    samples = model.sample_trained_model(settings, epoch, Z_samples.shape[0], Z_samples)\n    # get distances from generated samples to target samples\n    d_A, d_B = [], []\n    for sample in samples:\n        d_A.append(sample_distance(sample, samples[0], sigma))\n        d_B.append(sample_distance(sample, samples[-1], sigma))\n    distances = pd.DataFrame({'dA': d_A, 'dB': d_B})\n    plotting.save_plot_interpolate(input_samples, samples, epoch, settings['identifier'] + '_epoch' + str(epoch), distances=distances, sigma=sigma)\n    return True\n\ndef view_latent_vary(identifier, epoch, n_steps=6):\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    Z_sample = model.sample_Z(1, settings['seq_length'], settings['latent_dim'], \n                                      settings['use_time'])[0]\n    samples_dim = []\n    for dim in range(settings['latent_dim']):\n        Z_samples_dim = plotting.vary_latent_dimension(Z_sample, dim, n_steps)\n        samples_dim.append(model.sample_trained_model(settings, epoch, Z_samples_dim.shape[0], Z_samples_dim))\n    plotting.save_plot_vary_dimension(samples_dim, epoch, settings['identifier'] + '_varydim', n_dim=settings['latent_dim'])\n    return True\n\ndef view_reconstruction(identifier, epoch, real_samples, tolerance=1):\n    \"\"\"\n    Given a set of real samples, find the \"closest\" latent space points \n    corresponding to them, generate samples from these, visualise!\n    \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    Zs, error, sigma = model.invert(settings, epoch, real_samples, tolerance=tolerance)\n    plotting.visualise_latent(Zs[0], identifier+'_' + str(epoch) + '_0')\n    plotting.visualise_latent(Zs[1], identifier+'_' + str(epoch) + '_1')\n    model_samples = model.sample_trained_model(settings, epoch, Zs.shape[0], Zs)\n    plotting.save_plot_reconstruct(real_samples, model_samples, settings['identifier'])\n    return True\n\ndef view_fixed(identifier, epoch, n_samples=6, dim=None):\n    \"\"\" What happens when we give the same point at each time step? \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], \n                                      settings['use_time'])\n    # now, propagate forward the value at time 0 (which time doesn't matter)\n    for i in range(1, settings['seq_length']):\n        if dim is None:\n            Z_samples[:, i, :] = Z_samples[:, 0, :]\n        else:\n            Z_samples[:, i, dim] = Z_samples[:, 0, dim]\n    # now generate\n    samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples)\n    # now visualise\n    plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples)\n    return True\n\ndef view_params(identifier, epoch):\n    \"\"\" Visualise weight matrices in the GAN \"\"\"\n    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))\n    parameters = model.load_parameters(identifier + '_' + str(epoch))\n    plotting.plot_parameters(parameters, identifier + '_' + str(epoch))\n    return True\n\n# --- to do with samples --- #\n\ndef sample_distance(sampleA, sampleB, sigma):\n    \"\"\"\n    I know this isn't the best distance measure, alright.\n    \"\"\"\n    # RBF!\n    gamma = 1 / (2 * sigma**2)\n    similarity = np.exp(-gamma*(np.linalg.norm(sampleA - sampleB)**2))\n    distance = 1 - similarity\n    return distance\n\n### --- TSTR ---- ###\n\ndef train_CNN(train_X, train_Y, vali_X, vali_Y, test_X):\n    \"\"\"\n    Train a CNN (code copied/adapted from Cristobal's mnist_keras_trts_0_2)\n\t(ONLY MNIST, ONLY 14x14)\n    (ONLY DIGITS UP TO 3)\n    \"\"\"\n    print('Training CNN!')\n    input_shape = (14,14,1)\n    batch_size = 128\n    num_classes = 3\n    epochs = 1000\n\n    m = Sequential()\n    m.add(Conv2D(16, kernel_size=(3, 3),\n                        activation='relu',\n                        input_shape=input_shape))\n    m.add(Conv2D(32, (3, 3), activation='relu'))\n    m.add(MaxPooling2D(pool_size=(2, 2)))\n    m.add(Dropout(0.25))\n    m.add(Flatten())\n    m.add(Dense(128, activation='relu'))\n    m.add(Dropout(0.5))\n    m.add(Dense(num_classes, activation='softmax'))\n\n    m.compile(loss=keras.losses.categorical_crossentropy,\n            optimizer=keras.optimizers.Adadelta(),\n            metrics=['accuracy'])\n\n    earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto')\n    m.fit(np.expand_dims(train_X, axis=-1), train_Y,\n            batch_size=batch_size,\n            epochs=epochs,\n            verbose=1,\n            validation_data=(np.expand_dims(vali_X, axis=-1), vali_Y),\n            callbacks=[earlyStopping])\n    test_predictions = m.predict(np.expand_dims(test_X, axis=-1))\n    return test_predictions\n\ndef TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali=True, CNN=False, reverse=False):\n    \"\"\"\n    Either load or generate synthetic training, real test data...\n    Load synthetic training, real test data, do multi-class SVM\n    (basically just this: http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html)\n\n    If reverse = True: do TRTS\n    \"\"\"\n    print('Running TSTR on', identifier, 'at epoch', epoch)\n    if vali:\n        test_set = 'vali'\n    else:\n        test_set = 'test'\n    if generate:\n        data = np.load('./experiments/data/' + identifier + '.data.npy').item()\n        samples = data['samples']\n        train_X = samples['train']\n        test_X = samples[test_set]\n        labels = data['labels']\n        train_Y = labels['train']\n        test_Y = labels[test_set]\n        # now sample from the model\n        synth_Y = np.tile(train_Y, [duplicate_synth, 1])\n        synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)\n        # for use in TRTS\n        synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)\n        synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}\n        np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)\n    else:\n        print('Loading synthetic data from pre-sampled model')\n        exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()\n        test_X, test_Y = exp_data['test_data'], exp_data['test_labels']\n        train_X, train_Y = exp_data['train_data'], exp_data['train_labels']\n        synth_X, synth_Y = exp_data['synth_data'], exp_data['synth_labels']\n    if reverse:\n        which_setting = 'trts'\n        print('Swapping synthetic test set in for real, to do TRTS!')\n        test_X = synth_testX\n    else:\n        print('Doing normal TSTR')\n        which_setting = 'tstr'\n    # make classifier\n    if not CNN:\n        model_choice = 'RF'\n         # if multivariate, reshape\n        if len(test_X.shape) == 3:\n            test_X = test_X.reshape(test_X.shape[0], -1)\n        if len(train_X.shape) == 3:\n            train_X = train_X.reshape(train_X.shape[0], -1)\n        if len(synth_X.shape) == 3:\n            synth_X = synth_X.reshape(synth_X.shape[0], -1)\n        # if one hot, fix\n        if len(synth_Y.shape) > 1 and not synth_Y.shape[1] == 1:\n            synth_Y = np.argmax(synth_Y, axis=1)\n            train_Y = np.argmax(train_Y, axis=1)\n            test_Y = np.argmax(test_Y, axis=1)\n       # random forest\n        #synth_classifier = SVC(gamma=0.001)\n        #real_classifier = SVC(gamma=0.001)\n        synth_classifier = RandomForestClassifier(n_estimators=500)\n        real_classifier = RandomForestClassifier(n_estimators=500)\n        # fit\n        real_classifier.fit(train_X, train_Y)\n        synth_classifier.fit(synth_X, synth_Y)\n        # test on real\n        synth_predY = synth_classifier.predict(test_X)\n        real_predY = real_classifier.predict(test_X)\n    else:\n        model_choice = 'CNN'\n        synth_predY = train_CNN(synth_X, synth_Y, samples['vali'], labels['vali'], test_X)\n        clear_session()\n        real_predY = train_CNN(train_X, train_Y, samples['vali'], labels['vali'], test_X)\n        clear_session()\n        # CNN setting is all 'one-hot'\n        test_Y = np.argmax(test_Y, axis=1)\n        synth_predY = np.argmax(synth_predY, axis=1)\n        real_predY = np.argmax(real_predY, axis=1)\n    \n    # report on results\n    synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y, synth_predY, average='weighted')\n    synth_accuracy = accuracy_score(test_Y, synth_predY)\n    synth_auprc = 'NaN'\n    synth_auroc = 'NaN'\n    synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]\n    real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y, real_predY, average='weighted')\n    real_accuracy = accuracy_score(test_Y, real_predY)\n    real_auprc = 'NaN'\n    real_auroc = 'NaN'\n    real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]\n    \n    all_scores = synth_scores + real_scores\n\n    if vali:\n        report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')\n        report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\\n')\n        report_file.close()\n    else:\n        report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')\n        report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\\n')\n        report_file.close()\n        # visualise results\n        try:\n            plotting.view_mnist_eval(identifier + '_' + str(epoch), train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY)\n        except ValueError:\n            print('PLOTTING ERROR')\n            pdb.set_trace()\n    print(classification_report(test_Y, synth_predY))\n    print(classification_report(test_Y, real_predY))\n    return synth_f1, real_f1\n\ndef TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False):\n    \"\"\"\n    \"\"\"\n    if vali:\n        test_set = 'vali'\n    else:\n        test_set = 'test'\n    data = np.load('./experiments/data/' + identifier + '.data.npy').item()\n    samples = data['samples']\n    train_X = samples['train']\n    test_X = samples[test_set]\n    labels = data['labels']\n    train_Y = labels['train']\n    test_Y = labels[test_set]\n    if generate:\n        # now sample from the model\n        synth_Y = np.tile(train_Y, [duplicate_synth, 1])\n        synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)\n        # for use in TRTS\n        synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)\n        synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}\n        np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)\n    else:\n        print('Loading pre-generated data')\n        print('WARNING: not implemented for TRTS')\n        # get \"train\" data\n        exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()\n        synth_X = exp_data['samples']\n        synth_Y = exp_data['labels']\n        n_synth = synth_X.shape[0]\n        synth_X = synth_X.reshape(n_synth, -1)\n    #    pdb.set_trace()\n    #    # ALERT ALERT MODIFYING\n    #    synth_X = 2*(synth_X > 0) - 1\n    orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item()\n    if reverse:\n        which_setting = 'trts'\n    # visualise distribution of errors for train and test\n        print('Swapping synthetic test set in for real, to do TRTS!')\n        test_X = synth_testX\n    else:\n        print('Doing normal TSTR')\n        which_setting = 'tstr'\n#    # get test data\n#    test_X = data['test_X']\n#    test_Y = data['test_Y']\n    if not CNN:\n        model_choice = 'RF'\n         # if multivariate, reshape\n        if len(test_X.shape) == 3:\n            test_X = test_X.reshape(test_X.shape[0], -1)\n        if len(train_X.shape) == 3:\n            train_X = train_X.reshape(train_X.shape[0], -1)\n        if len(synth_X.shape) == 3:\n            synth_X = synth_X.reshape(synth_X.shape[0], -1)\n    else:\n        raise ValueError(CNN)\n        model_choice = 'CNN'\n    # we will select the best validation set epoch based on F1 score, take average across all the tasks\n    score_list = []\n    for label in range(synth_Y.shape[1]):\n        task = orig_data['Y_columns'][label]\n        if vali:\n            if not task in ['low_sao2', 'high_heartrate', 'low_respiration']:\n                print('Skipping task', task, 'because validation evaluation.')\n                continue\n        print('Evaluating on task:', task)\n        #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)')\n        #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label])\n        #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label])\n        synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label])\n        synth_predY = synth_classifier.predict(test_X)\n        synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1]\n        real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label])\n        real_predY = real_classifier.predict(test_X)\n        real_predY_prob = real_classifier.predict_proba(test_X)[:, 1]\n        #print('(predicted', np.mean(predict), 'positive labels)')\n        \n        synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted')\n        synth_accuracy = accuracy_score(test_Y[:, label], synth_predY)\n        synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob)\n        synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob)\n        synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]\n\n        real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted')\n        real_accuracy = accuracy_score(test_Y[:, label], real_predY)\n        real_auprc = average_precision_score(test_Y[:, label], real_predY_prob)\n        real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob)\n        real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]\n        \n        all_scores = synth_scores + real_scores\n\n        if vali:\n            report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')\n            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\\n')\n            report_file.close()\n        else:\n            report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')\n            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\\n')\n            report_file.close()\n        \n        print(classification_report(test_Y[:, label], synth_predY))\n        print(classification_report(test_Y[:, label], real_predY))\n        if task in ['low_sao2', 'high_heartrate', 'low_respiration']:\n            score_list.append(synth_auprc + synth_auroc)\n\n    if do_OR:\n        raise NotImplementedError\n        # do the OR task\n        extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4]\n        extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5]\n        extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6]\n        Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T\n        Y_OR_test = (Y_OR_test > 0)*1\n\n        extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4]\n        extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5]\n        extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6]\n        Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T\n        Y_OR_synth = (Y_OR_synth > 0)*1\n\n        OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP']\n        OR_results = []\n        for label in range(Y_OR_synth.shape[1]):\n            print('task:', OR_names[label])\n            print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)')\n            m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label])\n            predict = m.predict(X_test)\n            print('(predicted', np.mean(predict), 'positive labels)')\n            accuracy = accuracy_score(Y_OR_test[:, label], predict)\n            precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict)\n            recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict)\n            print(accuracy, precision, recall)\n            OR_results.append([accuracy, precision, recall])\n    else:\n        OR_results = []\n\n    score_across_tasks = np.mean(np.array(score_list))\n    return score_across_tasks\n\ndef NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine, identifier_mnist, epoch_mnist):\n    \"\"\"\n    for each experiment:\n    - plot a bunch of train examples\n    - sample a bunch of generated examples\n    - plot all in separate PDFs so i can merge in illustrator\n\n    for sine and rbf, grey background\n    MNIST is just MNIST (square though)\n    \"\"\"\n    n_samples = 15\n    # settings\n    settings_rbf = json.load(open('./experiments/settings/' + identifier_rbf + '.txt', 'r'))\n    settings_sine = json.load(open('./experiments/settings/' + identifier_sine + '.txt', 'r'))\n    settings_mnist = json.load(open('./experiments/settings/' + identifier_mnist + '.txt', 'r'))\n    # data\n    data_rbf = np.load('./experiments/data/' + identifier_rbf + '.data.npy').item()\n    data_sine = np.load('./experiments/data/' + identifier_sine + '.data.npy').item()\n    data_mnist = np.load('./experiments/data/' + identifier_mnist + '.data.npy').item()\n    train_rbf = data_rbf['samples']['train']\n    train_sine = data_sine['samples']['train']\n    train_mnist = data_mnist['samples']['train']\n    # sample\n    samples_rbf = model.sample_trained_model(settings_rbf, epoch_rbf, n_samples)\n    samples_sine = model.sample_trained_model(settings_sine, epoch_sine, n_samples)\n    samples_mnist = model.sample_trained_model(settings_mnist, epoch_mnist, n_samples)\n    # plot them all\n    index = 0\n    #for sample in np.random.permutation(train_rbf)[:n_samples]:\n    #    plotting.nips_plot_rbf(sample, index, 'train')\n    #    index += 1\n    #for sample in samples_rbf:\n    #    plotting.nips_plot_rbf(sample, index, 'GAN')\n    #    index += 1\n    #for sample in np.random.permutation(train_sine)[:n_samples]:\n    #    plotting.nips_plot_sine(sample, index, 'train')\n    #    index += 1\n    #for sample in samples_sine:\n    #    plotting.nips_plot_sine(sample, index, 'GAN')\n    #    index += 1\n    for sample in np.random.permutation(train_mnist)[:n_samples]:\n        plotting.nips_plot_mnist(sample, index, 'train')\n        index += 1\n    for sample in samples_mnist:\n        plotting.nips_plot_mnist(sample, index, 'GAN')\n        index += 1\n    return True\n"
  },
  {
    "path": "experiments/settings/kdd99.txt",
    "content": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,\r\n\"freq_low\": 1.0,\r\n\"freq_high\": 5.0,\r\n\"amplitude_low\": 0.1,\r\n\"amplitude_high\": 0.9,\r\n\"multivariate_mnist\": false,\r\n\"full_mnist\": false,\r\n\"data_load_from\": \"\",\r\n\"resample_rate_in_min\": 15,\r\n\"hidden_units_g\": 100,\r\n\"hidden_units_d\": 100,\r\n\"hidden_units_e\": 100,\r\n\"kappa\": 1,\r\n\"latent_dim\": 15,\r\n\"weight\": 0.5,\r\n\"degree\": 1,\r\n\"batch_mean\": false,\r\n\"learn_scale\": false,\r\n\"learning_rate\": 0.1,\r\n\"batch_size\": 500,\r\n\"num_epochs\": 100,\r\n\"D_rounds\": 1,\r\n\"G_rounds\": 3,\r\n\"E_rounds\": 1,\r\n\"shuffle\": true,\r\n\"eval_mul\": false,\r\n\"eval_an\": false,\r\n\"eval_single\": false,\r\n\"wrong_labels\": false,\r\n\"identifier\": \"kdd99\",\r\n\"sub_id\": \"kdd99\",\r\n\"dp\": false,\r\n\"l2norm_bound\": 1e-05,\r\n\"batches_per_lot\": 1,\r\n\"dp_sigma\": 1e-05,\r\n\"use_time\": false,\r\n\"seq_step\": 10,\r\n\"num_generated_features\": 6\r\n}"
  },
  {
    "path": "experiments/settings/kdd99_test.txt",
    "content": "{\r\n\"settings_file\": \"\",\r\n\"data\": \"kdd99_test\",\r\n\"seq_length\": 30,\r\n\"num_signals\": 6,\r\n\"normalise\": false,\r\n\"scale\": 0.1,\r\n\"freq_low\": 1.0,\r\n\"freq_high\": 5.0,\r\n\"amplitude_low\": 0.1,\r\n\"amplitude_high\": 0.9,\r\n\"multivariate_mnist\": false,\r\n\"full_mnist\": false,\r\n\"data_load_from\": \"\",\r\n\"resample_rate_in_min\": 15,\r\n\"hidden_units_g\": 100,\r\n\"hidden_units_d\": 100,\r\n\"hidden_units_e\": 100,\r\n\"kappa\": 1,\r\n\"latent_dim\": 15,\r\n\"weight\": 0.5,\r\n\"degree\": 1,\r\n\"batch_mean\": false,\r\n\"learn_scale\": false,\r\n\"learning_rate\": 0.1,\r\n\"batch_size\": 500,\r\n\"num_epochs\": 100,\r\n\"D_rounds\": 1,\r\n\"G_rounds\": 3,\r\n\"E_rounds\": 1,\r\n\"shuffle\": true,\r\n\"eval_mul\": false,\r\n\"eval_an\": false,\r\n\"eval_single\": false,\r\n\"wrong_labels\": false,\r\n\"identifier\": \"kdd99_test\",\r\n\"sub_id\": \"kdd99\",\r\n\"dp\": false,\r\n\"l2norm_bound\": 1e-05,\r\n\"batches_per_lot\": 1,\r\n\"dp_sigma\": 1e-05,\r\n\"use_time\": false,\r\n\"seq_step\": 10,\r\n\"num_generated_features\": 6\r\n}"
  },
  {
    "path": "mmd.py",
    "content": "'''\nMMD functions implemented in tensorflow.\n(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py)\n'''\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom tf_ops import dot, sq_sum\n\nfrom scipy.spatial.distance import pdist\nfrom numpy import median, vstack, einsum\nimport pdb\nimport numpy as np\n\n_eps=1e-8\n\n################################################################################\n### Quadratic-time MMD with Gaussian RBF kernel\n\ndef _mix_rbf_kernel(X, Y, sigmas, wts=None):\n    \"\"\"\n    \"\"\"\n    if wts is None:\n        wts = [1.0] * sigmas.get_shape()[0]\n\n    # debug!\n    if len(X.shape) == 2:\n        # matrix\n        XX = tf.matmul(X, X, transpose_b=True)\n        XY = tf.matmul(X, Y, transpose_b=True)\n        YY = tf.matmul(Y, Y, transpose_b=True)\n    elif len(X.shape) == 3:\n        # tensor -- this is computing the Frobenius norm\n        XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]])\n        XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]])\n        YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]])\n    else:\n        raise ValueError(X)\n\n    X_sqnorms = tf.diag_part(XX)\n    Y_sqnorms = tf.diag_part(YY)\n\n    r = lambda x: tf.expand_dims(x, 0)\n    c = lambda x: tf.expand_dims(x, 1)\n\n    K_XX, K_XY, K_YY = 0, 0, 0\n    for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts):\n        gamma = 1 / (2 * sigma**2)\n        K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))\n        K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))\n        K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))\n\n    return K_XX, K_XY, K_YY, tf.reduce_sum(wts)\n\n\ndef rbf_mmd2(X, Y, sigma=1, biased=True):\n    return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased)\n\n\ndef mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):\n    K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)\n    return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)\n\n\ndef rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):\n    return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased)\n\n\ndef mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):\n    K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)\n    return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)\n\n\n################################################################################\n### Helper functions to compute variances based on kernel matrices\n\n\ndef _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):\n    m = tf.cast(K_XX.get_shape()[0], tf.float32)\n    n = tf.cast(K_YY.get_shape()[0], tf.float32)\n\n    if biased:\n        mmd2 = (tf.reduce_sum(K_XX) / (m * m)\n              + tf.reduce_sum(K_YY) / (n * n)\n              - 2 * tf.reduce_sum(K_XY) / (m * n))\n    else:\n        if const_diagonal is not False:\n            trace_X = m * const_diagonal\n            trace_Y = n * const_diagonal\n        else:\n            trace_X = tf.trace(K_XX)\n            trace_Y = tf.trace(K_YY)\n\n        mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1))\n              + (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1))\n              - 2 * tf.reduce_sum(K_XY) / (m * n))\n\n    return mmd2\n\n\ndef _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,\n                    min_var_est=_eps):\n    mmd2, var_est = _mmd2_and_variance(\n        K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)\n    ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))\n    return mmd2, ratio\n\n\ndef _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):\n    m = tf.cast(K_XX.get_shape()[0], tf.float32)  # Assumes X, Y are same shape\n\n    ### Get the various sums of kernels that we'll use\n    # Kts drop the diagonal, but we don't need to compute them explicitly\n    if const_diagonal is not False:\n        const_diagonal = tf.cast(const_diagonal, tf.float32)\n        diag_X = diag_Y = const_diagonal\n        sum_diag_X = sum_diag_Y = m * const_diagonal\n        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2\n    else:\n        diag_X = tf.diag_part(K_XX)\n        diag_Y = tf.diag_part(K_YY)\n\n        sum_diag_X = tf.reduce_sum(diag_X)\n        sum_diag_Y = tf.reduce_sum(diag_Y)\n\n        sum_diag2_X = sq_sum(diag_X)\n        sum_diag2_Y = sq_sum(diag_Y)\n\n    Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X\n    Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y\n    K_XY_sums_0 = tf.reduce_sum(K_XY, 0)\n    K_XY_sums_1 = tf.reduce_sum(K_XY, 1)\n\n    Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)\n    Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)\n    K_XY_sum = tf.reduce_sum(K_XY_sums_0)\n\n    Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X\n    Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y\n    K_XY_2_sum  = sq_sum(K_XY)\n\n    if biased:\n        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)\n              + (Kt_YY_sum + sum_diag_Y) / (m * m)\n              - 2 * K_XY_sum / (m * m))\n    else:\n        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))\n              + (Kt_YY_sum + sum_diag_Y) / (m * (m-1))\n              - 2 * K_XY_sum / (m * m))\n\n    var_est = (\n          2 / (m**2 * (m-1)**2) * (\n              2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum\n            + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)\n        - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)\n        + 4*(m-2) / (m**3 * (m-1)**2) * (\n              sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))\n        - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum\n        - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2\n        + 8 / (m**3 * (m-1)) * (\n              1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum\n            - dot(Kt_XX_sums, K_XY_sums_1)\n            - dot(Kt_YY_sums, K_XY_sums_0))\n    )\n\n    return mmd2, var_est\n\n\n### additions from stephanie, for convenience\n\ndef median_pairwise_distance(X, Y=None):\n    \"\"\"\n    Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.\n    If Y is missing, just calculate it from X:\n        this is so that, during training, as Y changes, we can use a fixed\n        bandwidth (and save recalculating this each time we evaluated the mmd)\n    At the end of training, we do the heuristic \"correctly\" by including\n    both X and Y.\n\n    Note: most of this code is assuming tensorflow, but X and Y are just ndarrays\n    \"\"\"\n    if Y is None:\n        Y = X       # this is horrendously inefficient, sorry\n   \n    if len(X.shape) == 2:\n        # matrix\n        X_sqnorms = einsum('...i,...i', X, X)\n        Y_sqnorms = einsum('...i,...i', Y, Y)\n        XY = einsum('ia,ja', X, Y)\n    elif len(X.shape) == 3:\n        # tensor -- this is computing the Frobenius norm\n        X_sqnorms = einsum('...ij,...ij', X, X)\n        Y_sqnorms = einsum('...ij,...ij', Y, Y)\n        XY = einsum('iab,jab', X, Y)\n    else:\n        raise ValueError(X)\n\n    distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1))\n    return median(distances)\n\n\ndef median_pairwise_distance_o(X, Y=None):\n    \"\"\"\n    Heuristic for bandwidth of the RBF. Median pairwise distance of joint data.\n    If Y is missing, just calculate it from X:\n        this is so that, during training, as Y changes, we can use a fixed\n        bandwidth (and save recalculating this each time we evaluated the mmd)\n    At the end of training, we do the heuristic \"correctly\" by including\n    both X and Y.\n\n    Note: most of this code is assuming tensorflow, but X and Y are just ndarrays\n    \"\"\"\n    if Y is None:\n        Y = X  # this is horrendously inefficient, sorry\n\n    if len(X.shape) == 2:\n        # matrix\n        X_sqnorms = np.einsum('...i,...i', X, X)\n        Y_sqnorms = np.einsum('...i,...i', Y, Y)\n        XY = np.einsum('ia,ja', X, Y)\n    elif len(X.shape) == 3:\n        # tensor -- this is computing the Frobenius norm\n        X_sqnorms = np.einsum('...ij,...ij', X, X)  # reduce the tensor shape\n        Y_sqnorms = np.einsum('...ij,...ij', Y, Y)\n        XY = np.einsum('iab,jab', X, Y)  # X*Y^T??\n    else:\n        raise ValueError(X)\n\n    distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2 * XY + Y_sqnorms.reshape(1, -1))\n    distances = distances.reshape(-1, 1)\n    distances = distances[~np.isnan(distances)]\n    return np.median(distances)"
  },
  {
    "path": "mod_core_rnn_cell_impl.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n#modified by Stephanie (@corcra) to enable initializing the bias term in lstm \"\"\"\n# ==============================================================================\n\n\"\"\"Module implementing RNN Cells.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\nimport contextlib\nimport hashlib\nimport math\nimport numbers\n\nfrom tensorflow.python.framework import ops\nfrom tensorflow.python.framework import tensor_shape\nfrom tensorflow.python.framework import tensor_util\nfrom tensorflow.python.ops import array_ops\nfrom tensorflow.python.ops import clip_ops\nfrom tensorflow.python.ops import embedding_ops\nfrom tensorflow.python.ops import init_ops\nfrom tensorflow.python.ops import math_ops\nfrom tensorflow.python.ops import nn_ops\nfrom tensorflow.python.ops import partitioned_variables\nfrom tensorflow.python.ops import random_ops\nfrom tensorflow.python.ops import variable_scope as vs\n\nfrom tensorflow.python.ops.math_ops import sigmoid\nfrom tensorflow.python.ops.math_ops import tanh\n#from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell\nfrom tensorflow.python.ops.rnn_cell_impl import RNNCell\n\nfrom tensorflow.python.platform import tf_logging as logging\nfrom tensorflow.python.util import nest\n\n\n_BIAS_VARIABLE_NAME = \"biases\"\n_WEIGHTS_VARIABLE_NAME = \"weights\"\n\n\n@contextlib.contextmanager\ndef _checked_scope(cell, scope, reuse=None, **kwargs):\n  if reuse is not None:\n    kwargs[\"reuse\"] = reuse\n  with vs.variable_scope(scope, **kwargs) as checking_scope:\n    scope_name = checking_scope.name\n    if hasattr(cell, \"_scope\"):\n      cell_scope = cell._scope  # pylint: disable=protected-access\n      if cell_scope.name != checking_scope.name:\n        raise ValueError(\n            \"Attempt to reuse RNNCell %s with a different variable scope than \"\n            \"its first use.  First use of cell was with scope '%s', this \"\n            \"attempt is with scope '%s'.  Please create a new instance of the \"\n            \"cell if you would like it to use a different set of weights.  \"\n            \"If before you were using: MultiRNNCell([%s(...)] * num_layers), \"\n            \"change to: MultiRNNCell([%s(...) for _ in range(num_layers)]).  \"\n            \"If before you were using the same cell instance as both the \"\n            \"forward and reverse cell of a bidirectional RNN, simply create \"\n            \"two instances (one for forward, one for reverse).  \"\n            \"In May 2017, we will start transitioning this cell's behavior \"\n            \"to use existing stored weights, if any, when it is called \"\n            \"with scope=None (which can lead to silent model degradation, so \"\n            \"this error will remain until then.)\"\n            % (cell, cell_scope.name, scope_name, type(cell).__name__,\n               type(cell).__name__))\n    else:\n      weights_found = False\n      try:\n        with vs.variable_scope(checking_scope, reuse=True):\n          vs.get_variable(_WEIGHTS_VARIABLE_NAME)\n        weights_found = True\n      except ValueError:\n        pass\n      if weights_found and reuse is None:\n        raise ValueError(\n            \"Attempt to have a second RNNCell use the weights of a variable \"\n            \"scope that already has weights: '%s'; and the cell was not \"\n            \"constructed as %s(..., reuse=True).  \"\n            \"To share the weights of an RNNCell, simply \"\n            \"reuse it in your second calculation, or create a new one with \"\n            \"the argument reuse=True.\" % (scope_name, type(cell).__name__))\n\n    # Everything is OK.  Update the cell's scope and yield it.\n    cell._scope = checking_scope  # pylint: disable=protected-access\n    yield checking_scope\n\n\nclass BasicRNNCell(RNNCell):\n  \"\"\"The most basic RNN cell.\"\"\"\n\n  def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):\n    if input_size is not None:\n      logging.warn(\"%s: The input_size parameter is deprecated.\", self)\n    self._num_units = num_units\n    self._activation = activation\n    self._reuse = reuse\n\n  @property\n  def state_size(self):\n    return self._num_units\n\n  @property\n  def output_size(self):\n    return self._num_units\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Most basic RNN: output = new_state = act(W * input + U * state + B).\"\"\"\n    with _checked_scope(self, scope or \"basic_rnn_cell\", reuse=self._reuse):\n      output = self._activation(\n          _linear([inputs, state], self._num_units, True))\n    return output, output\n\n\nclass GRUCell(RNNCell):\n  \"\"\"Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).\"\"\"\n\n  def __init__(self, num_units, input_size=None, activation=tanh, reuse=None):\n    if input_size is not None:\n      logging.warn(\"%s: The input_size parameter is deprecated.\", self)\n    self._num_units = num_units\n    self._activation = activation\n    self._reuse = reuse\n\n  @property\n  def state_size(self):\n    return self._num_units\n\n  @property\n  def output_size(self):\n    return self._num_units\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Gated recurrent unit (GRU) with nunits cells.\"\"\"\n    with _checked_scope(self, scope or \"gru_cell\", reuse=self._reuse):\n      with vs.variable_scope(\"gates\"):  # Reset gate and update gate.\n        # We start with bias of 1.0 to not reset and not update.\n        value = sigmoid(_linear(\n          [inputs, state], 2 * self._num_units, True, 1.0))\n        r, u = array_ops.split(\n            value=value,\n            num_or_size_splits=2,\n            axis=1)\n      with vs.variable_scope(\"candidate\"):\n        c = self._activation(_linear([inputs, r * state],\n                                     self._num_units, True))\n      new_h = u * state + (1 - u) * c\n    return new_h, new_h\n\n\n_LSTMStateTuple = collections.namedtuple(\"LSTMStateTuple\", (\"c\", \"h\"))\n\n\nclass LSTMStateTuple(_LSTMStateTuple):\n  \"\"\"Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state.\n\n  Stores two elements: `(c, h)`, in that order.\n\n  Only used when `state_is_tuple=True`.\n  \"\"\"\n  __slots__ = ()\n\n  @property\n  def dtype(self):\n    (c, h) = self\n    if not c.dtype == h.dtype:\n      raise TypeError(\"Inconsistent internal state: %s vs %s\" %\n                      (str(c.dtype), str(h.dtype)))\n    return c.dtype\n\n\nclass BasicLSTMCell(RNNCell):\n  \"\"\"Basic LSTM recurrent network cell.\n\n  The implementation is based on: http://arxiv.org/abs/1409.2329.\n\n  We add forget_bias (default: 1) to the biases of the forget gate in order to\n  reduce the scale of forgetting in the beginning of the training.\n\n  It does not allow cell clipping, a projection layer, and does not\n  use peep-hole connections: it is the basic baseline.\n\n  For advanced models, please use the full LSTMCell that follows.\n  \"\"\"\n\n  def __init__(self, num_units, forget_bias=1.0, input_size=None,\n               state_is_tuple=True, activation=tanh, reuse=None):\n    \"\"\"Initialize the basic LSTM cell.\n\n    Args:\n      num_units: int, The number of units in the LSTM cell.\n      forget_bias: float, The bias added to forget gates (see above).\n      input_size: Deprecated and unused.\n      state_is_tuple: If True, accepted and returned states are 2-tuples of\n        the `c_state` and `m_state`.  If False, they are concatenated\n        along the column axis.  The latter behavior will soon be deprecated.\n      activation: Activation function of the inner states.\n      reuse: (optional) Python boolean describing whether to reuse variables\n        in an existing scope.  If not `True`, and the existing scope already has\n        the given variables, an error is raised.\n    \"\"\"\n    if not state_is_tuple:\n      logging.warn(\"%s: Using a concatenated state is slower and will soon be \"\n                   \"deprecated.  Use state_is_tuple=True.\", self)\n    if input_size is not None:\n      logging.warn(\"%s: The input_size parameter is deprecated.\", self)\n    self._num_units = num_units\n    self._forget_bias = forget_bias\n    self._state_is_tuple = state_is_tuple\n    self._activation = activation\n    self._reuse = reuse\n\n  @property\n  def state_size(self):\n    return (LSTMStateTuple(self._num_units, self._num_units)\n            if self._state_is_tuple else 2 * self._num_units)\n\n  @property\n  def output_size(self):\n    return self._num_units\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Long short-term memory cell (LSTM).\"\"\"\n    with _checked_scope(self, scope or \"basic_lstm_cell\", reuse=self._reuse):\n      # Parameters of gates are concatenated into one multiply for efficiency.\n      if self._state_is_tuple:\n        c, h = state\n      else:\n        c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)\n      concat = _linear([inputs, h], 4 * self._num_units, True)\n\n      # i = input_gate, j = new_input, f = forget_gate, o = output_gate\n      i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)\n\n      new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *\n               self._activation(j))\n      new_h = self._activation(new_c) * sigmoid(o)\n\n      if self._state_is_tuple:\n        new_state = LSTMStateTuple(new_c, new_h)\n      else:\n        new_state = array_ops.concat([new_c, new_h], 1)\n      return new_h, new_state\n\n\nclass LSTMCell(RNNCell):\n  \"\"\"Long short-term memory unit (LSTM) recurrent network cell.\n\n  The default non-peephole implementation is based on:\n\n    http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf\n\n  S. Hochreiter and J. Schmidhuber.\n  \"Long Short-Term Memory\". Neural Computation, 9(8):1735-1780, 1997.\n\n  The peephole implementation is based on:\n\n    https://research.google.com/pubs/archive/43905.pdf\n\n  Hasim Sak, Andrew Senior, and Francoise Beaufays.\n  \"Long short-term memory recurrent neural network architectures for\n   large scale acoustic modeling.\" INTERSPEECH, 2014.\n\n  The class uses optional peep-hole connections, optional cell clipping, and\n  an optional projection layer.\n  \"\"\"\n\n  def __init__(self, num_units, input_size=None,\n               use_peepholes=False, cell_clip=None,\n               initializer=None, bias_start=0.0, num_proj=None, proj_clip=None,\n               num_unit_shards=None, num_proj_shards=None,\n               forget_bias=1.0, state_is_tuple=True,\n               activation=tanh, reuse=None):\n    \"\"\"Initialize the parameters for an LSTM cell.\n\n    Args:\n      num_units: int, The number of units in the LSTM cell\n      input_size: Deprecated and unused.\n      use_peepholes: bool, set True to enable diagonal/peephole connections.\n      cell_clip: (optional) A float value, if provided the cell state is clipped\n        by this value prior to the cell output activation.\n      initializer: (optional) The initializer to use for the weight and\n        projection matrices.\n      bias_start: (optional) The VALUE to initialize the bias to, in\n        the linear call\n      num_proj: (optional) int, The output dimensionality for the projection\n        matrices.  If None, no projection is performed.\n      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is\n        provided, then the projected values are clipped elementwise to within\n        `[-proj_clip, proj_clip]`.\n      num_unit_shards: Deprecated, will be removed by Jan. 2017.\n        Use a variable_scope partitioner instead.\n      num_proj_shards: Deprecated, will be removed by Jan. 2017.\n        Use a variable_scope partitioner instead.\n      forget_bias: Biases of the forget gate are initialized by default to 1\n        in order to reduce the scale of forgetting at the beginning of\n        the training.\n      state_is_tuple: If True, accepted and returned states are 2-tuples of\n        the `c_state` and `m_state`.  If False, they are concatenated\n        along the column axis.  This latter behavior will soon be deprecated.\n      activation: Activation function of the inner states.\n      reuse: (optional) Python boolean describing whether to reuse variables\n        in an existing scope.  If not `True`, and the existing scope already has\n        the given variables, an error is raised.\n    \"\"\"\n    if not state_is_tuple:\n      logging.warn(\"%s: Using a concatenated state is slower and will soon be \"\n                   \"deprecated.  Use state_is_tuple=True.\", self)\n    if input_size is not None:\n      logging.warn(\"%s: The input_size parameter is deprecated.\", self)\n    if num_unit_shards is not None or num_proj_shards is not None:\n      logging.warn(\n          \"%s: The num_unit_shards and proj_unit_shards parameters are \"\n          \"deprecated and will be removed in Jan 2017.  \"\n          \"Use a variable scope with a partitioner instead.\", self)\n\n    self._num_units = num_units\n    self._use_peepholes = use_peepholes\n    self._cell_clip = cell_clip\n    self._initializer = initializer\n    self._bias_start = bias_start\n    self._num_proj = num_proj\n    self._proj_clip = proj_clip\n    self._num_unit_shards = num_unit_shards\n    self._num_proj_shards = num_proj_shards\n    self._forget_bias = forget_bias\n    self._state_is_tuple = state_is_tuple\n    self._activation = activation\n    self._reuse = reuse\n\n    if num_proj:\n      self._state_size = (\n          LSTMStateTuple(num_units, num_proj)\n          if state_is_tuple else num_units + num_proj)\n      self._output_size = num_proj\n    else:\n      self._state_size = (\n          LSTMStateTuple(num_units, num_units)\n          if state_is_tuple else 2 * num_units)\n      self._output_size = num_units\n\n  @property\n  def state_size(self):\n    return self._state_size\n\n  @property\n  def output_size(self):\n    return self._output_size\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run one step of LSTM.\n\n    Args:\n      inputs: input Tensor, 2D, batch x num_units.\n      state: if `state_is_tuple` is False, this must be a state Tensor,\n        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a\n        tuple of state Tensors, both `2-D`, with column sizes `c_state` and\n        `m_state`.\n      scope: VariableScope for the created subgraph; defaults to \"lstm_cell\".\n\n    Returns:\n      A tuple containing:\n\n      - A `2-D, [batch x output_dim]`, Tensor representing the output of the\n        LSTM after reading `inputs` when previous state was `state`.\n        Here output_dim is:\n           num_proj if num_proj was set,\n           num_units otherwise.\n      - Tensor(s) representing the new state of LSTM after reading `inputs` when\n        the previous state was `state`.  Same type and shape(s) as `state`.\n\n    Raises:\n      ValueError: If input size cannot be inferred from inputs via\n        static shape inference.\n    \"\"\"\n    num_proj = self._num_units if self._num_proj is None else self._num_proj\n\n    if self._state_is_tuple:\n      (c_prev, m_prev) = state\n    else:\n      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])\n      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])\n\n    dtype = inputs.dtype\n    input_size = inputs.get_shape().with_rank(2)[1]\n    if input_size.value is None:\n      raise ValueError(\"Could not infer input size from inputs.get_shape()[-1]\")\n    with _checked_scope(self, scope or \"lstm_cell\",\n                        initializer=self._initializer,\n                        reuse=self._reuse) as unit_scope:\n      if self._num_unit_shards is not None:\n        unit_scope.set_partitioner(\n            partitioned_variables.fixed_size_partitioner(\n                self._num_unit_shards))\n      # i = input_gate, j = new_input, f = forget_gate, o = output_gate\n      lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, bias_start=self._bias_start)\n      i, j, f, o = array_ops.split(\n          value=lstm_matrix, num_or_size_splits=4, axis=1)\n      # Diagonal connections\n      if self._use_peepholes:\n        with vs.variable_scope(unit_scope) as projection_scope:\n          if self._num_unit_shards is not None:\n            projection_scope.set_partitioner(None)\n          w_f_diag = vs.get_variable(\n              \"w_f_diag\", shape=[self._num_units], dtype=dtype)\n          w_i_diag = vs.get_variable(\n              \"w_i_diag\", shape=[self._num_units], dtype=dtype)\n          w_o_diag = vs.get_variable(\n              \"w_o_diag\", shape=[self._num_units], dtype=dtype)\n\n      if self._use_peepholes:\n        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +\n             sigmoid(i + w_i_diag * c_prev) * self._activation(j))\n      else:\n        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *\n             self._activation(j))\n\n      if self._cell_clip is not None:\n        # pylint: disable=invalid-unary-operand-type\n        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)\n        # pylint: enable=invalid-unary-operand-type\n      if self._use_peepholes:\n        m = sigmoid(o + w_o_diag * c) * self._activation(c)\n      else:\n        m = sigmoid(o) * self._activation(c)\n\n      if self._num_proj is not None:\n        with vs.variable_scope(\"projection\") as proj_scope:\n          if self._num_proj_shards is not None:\n            proj_scope.set_partitioner(\n                partitioned_variables.fixed_size_partitioner(\n                    self._num_proj_shards))\n          m = _linear(m, self._num_proj, bias=False)\n\n        if self._proj_clip is not None:\n          # pylint: disable=invalid-unary-operand-type\n          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)\n          # pylint: enable=invalid-unary-operand-type\n\n    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else\n                 array_ops.concat([c, m], 1))\n    return m, new_state\n\n\nclass OutputProjectionWrapper(RNNCell):\n  \"\"\"Operator adding an output projection to the given cell.\n\n  Note: in many cases it may be more efficient to not use this wrapper,\n  but instead concatenate the whole sequence of your outputs in time,\n  do the projection on this batch-concatenated sequence, then split it\n  if needed or directly feed into a softmax.\n  \"\"\"\n\n  def __init__(self, cell, output_size, reuse=None):\n    \"\"\"Create a cell with output projection.\n\n    Args:\n      cell: an RNNCell, a projection to output_size is added to it.\n      output_size: integer, the size of the output after projection.\n      reuse: (optional) Python boolean describing whether to reuse variables\n        in an existing scope.  If not `True`, and the existing scope already has\n        the given variables, an error is raised.\n\n    Raises:\n      TypeError: if cell is not an RNNCell.\n      ValueError: if output_size is not positive.\n    \"\"\"\n    if not isinstance(cell, RNNCell):\n      raise TypeError(\"The parameter cell is not RNNCell.\")\n    if output_size < 1:\n      raise ValueError(\"Parameter output_size must be > 0: %d.\" % output_size)\n    self._cell = cell\n    self._output_size = output_size\n    self._reuse = reuse\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the cell and output projection on inputs, starting from state.\"\"\"\n    output, res_state = self._cell(inputs, state)\n    # Default scope: \"OutputProjectionWrapper\"\n    with _checked_scope(self, scope or \"output_projection_wrapper\",\n                        reuse=self._reuse):\n      projected = _linear(output, self._output_size, True)\n    return projected, res_state\n\n\nclass InputProjectionWrapper(RNNCell):\n  \"\"\"Operator adding an input projection to the given cell.\n\n  Note: in many cases it may be more efficient to not use this wrapper,\n  but instead concatenate the whole sequence of your inputs in time,\n  do the projection on this batch-concatenated sequence, then split it.\n  \"\"\"\n\n  def __init__(self, cell, num_proj, input_size=None):\n    \"\"\"Create a cell with input projection.\n\n    Args:\n      cell: an RNNCell, a projection of inputs is added before it.\n      num_proj: Python integer.  The dimension to project to.\n      input_size: Deprecated and unused.\n\n    Raises:\n      TypeError: if cell is not an RNNCell.\n    \"\"\"\n    if input_size is not None:\n      logging.warn(\"%s: The input_size parameter is deprecated.\", self)\n    if not isinstance(cell, RNNCell):\n      raise TypeError(\"The parameter cell is not RNNCell.\")\n    self._cell = cell\n    self._num_proj = num_proj\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._cell.output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the input projection and then the cell.\"\"\"\n    # Default scope: \"InputProjectionWrapper\"\n    with vs.variable_scope(scope or \"input_projection_wrapper\"):\n      projected = _linear(inputs, self._num_proj, True)\n    return self._cell(projected, state)\n\n\ndef _enumerated_map_structure(map_fn, *args, **kwargs):\n  ix = [0]\n  def enumerated_fn(*inner_args, **inner_kwargs):\n    r = map_fn(ix[0], *inner_args, **inner_kwargs)\n    ix[0] += 1\n    return r\n  return nest.map_structure(enumerated_fn, *args, **kwargs)\n\n\nclass DropoutWrapper(RNNCell):\n  \"\"\"Operator adding dropout to inputs and outputs of the given cell.\"\"\"\n\n  def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,\n               state_keep_prob=1.0, variational_recurrent=False,\n               input_size=None, dtype=None, seed=None):\n    \"\"\"Create a cell with added input, state, and/or output dropout.\n\n    If `variational_recurrent` is set to `True` (**NOT** the default behavior),\n    then the the same dropout mask is applied at every step, as described in:\n\n    Y. Gal, Z Ghahramani.  \"A Theoretically Grounded Application of Dropout in\n    Recurrent Neural Networks\".  https://arxiv.org/abs/1512.05287\n\n    Otherwise a different dropout mask is applied at every time step.\n\n    Args:\n      cell: an RNNCell, a projection to output_size is added to it.\n      input_keep_prob: unit Tensor or float between 0 and 1, input keep\n        probability; if it is constant and 1, no input dropout will be added.\n      output_keep_prob: unit Tensor or float between 0 and 1, output keep\n        probability; if it is constant and 1, no output dropout will be added.\n      state_keep_prob: unit Tensor or float between 0 and 1, output keep\n        probability; if it is constant and 1, no output dropout will be added.\n        State dropout is performed on the *output* states of the cell.\n      variational_recurrent: Python bool.  If `True`, then the same\n        dropout pattern is applied across all time steps per run call.\n        If this parameter is set, `input_size` **must** be provided.\n      input_size: (optional) (possibly nested tuple of) `TensorShape` objects\n        containing the depth(s) of the input tensors expected to be passed in to\n        the `DropoutWrapper`.  Required and used **iff**\n         `variational_recurrent = True` and `input_keep_prob < 1`.\n      dtype: (optional) The `dtype` of the input, state, and output tensors.\n        Required and used **iff** `variational_recurrent = True`.\n      seed: (optional) integer, the randomness seed.\n\n    Raises:\n      TypeError: if cell is not an RNNCell.\n      ValueError: if any of the keep_probs are not between 0 and 1.\n    \"\"\"\n    if not isinstance(cell, RNNCell):\n      raise TypeError(\"The parameter cell is not a RNNCell.\")\n    with ops.name_scope(\"DropoutWrapperInit\"):\n      def tensor_and_const_value(v):\n        tensor_value = ops.convert_to_tensor(v)\n        const_value = tensor_util.constant_value(tensor_value)\n        return (tensor_value, const_value)\n      for prob, attr in [(input_keep_prob, \"input_keep_prob\"),\n                         (state_keep_prob, \"state_keep_prob\"),\n                         (output_keep_prob, \"output_keep_prob\")]:\n        tensor_prob, const_prob = tensor_and_const_value(prob)\n        if const_prob is not None:\n          if const_prob < 0 or const_prob > 1:\n            raise ValueError(\"Parameter %s must be between 0 and 1: %d\"\n                             % (attr, const_prob))\n          setattr(self, \"_%s\" % attr, float(const_prob))\n        else:\n          setattr(self, \"_%s\" % attr, tensor_prob)\n\n    # Set cell, variational_recurrent, seed before running the code below\n    self._cell = cell\n    self._variational_recurrent = variational_recurrent\n    self._seed = seed\n\n    self._recurrent_input_noise = None\n    self._recurrent_state_noise = None\n    self._recurrent_output_noise = None\n\n    if variational_recurrent:\n      if dtype is None:\n        raise ValueError(\n            \"When variational_recurrent=True, dtype must be provided\")\n\n      def convert_to_batch_shape(s):\n        # Prepend a 1 for the batch dimension; for recurrent\n        # variational dropout we use the same dropout mask for all\n        # batch elements.\n        return array_ops.concat(\n            ([1], tensor_shape.TensorShape(s).as_list()), 0)\n\n      def batch_noise(s, inner_seed):\n        shape = convert_to_batch_shape(s)\n        return random_ops.random_uniform(shape, seed=inner_seed, dtype=dtype)\n\n      if (not isinstance(self._input_keep_prob, numbers.Real) or\n          self._input_keep_prob < 1.0):\n        if input_size is None:\n          raise ValueError(\n              \"When variational_recurrent=True and input_keep_prob < 1.0 or \"\n              \"is unknown, input_size must be provided\")\n        self._recurrent_input_noise = _enumerated_map_structure(\n            lambda i, s: batch_noise(s, inner_seed=self._gen_seed(\"input\", i)),\n            input_size)\n      self._recurrent_state_noise = _enumerated_map_structure(\n          lambda i, s: batch_noise(s, inner_seed=self._gen_seed(\"state\", i)),\n          cell.state_size)\n      self._recurrent_output_noise = _enumerated_map_structure(\n          lambda i, s: batch_noise(s, inner_seed=self._gen_seed(\"output\", i)),\n          cell.output_size)\n\n  def _gen_seed(self, salt_prefix, index):\n    if self._seed is None:\n      return None\n    salt = \"%s_%d\" % (salt_prefix, index)\n    string = (str(self._seed) + salt).encode(\"utf-8\")\n    return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._cell.output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def _variational_recurrent_dropout_value(\n      self, index, value, noise, keep_prob):\n    \"\"\"Performs dropout given the pre-calculated noise tensor.\"\"\"\n    # uniform [keep_prob, 1.0 + keep_prob)\n    random_tensor = keep_prob + noise\n\n    # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)\n    binary_tensor = math_ops.floor(random_tensor)\n    ret = math_ops.div(value, keep_prob) * binary_tensor\n    ret.set_shape(value.get_shape())\n    return ret\n\n  def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob):\n    \"\"\"Decides whether to perform standard dropout or recurrent dropout.\"\"\"\n    if not self._variational_recurrent:\n      def dropout(i, v):\n        return nn_ops.dropout(\n            v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i))\n      return _enumerated_map_structure(dropout, values)\n    else:\n      def dropout(i, v, n):\n        return self._variational_recurrent_dropout_value(i, v, n, keep_prob)\n      return _enumerated_map_structure(dropout, values, recurrent_noise)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the cell with the declared dropouts.\"\"\"\n    def _should_dropout(p):\n      return (not isinstance(p, float)) or p < 1\n\n    if _should_dropout(self._input_keep_prob):\n      inputs = self._dropout(inputs, \"input\",\n                             self._recurrent_input_noise,\n                             self._input_keep_prob)\n    output, new_state = self._cell(inputs, state, scope)\n    if _should_dropout(self._state_keep_prob):\n      new_state = self._dropout(new_state, \"state\",\n                                self._recurrent_state_noise,\n                                self._state_keep_prob)\n    if _should_dropout(self._output_keep_prob):\n      output = self._dropout(output, \"output\",\n                             self._recurrent_output_noise,\n                             self._output_keep_prob)\n    return output, new_state\n\n\nclass ResidualWrapper(RNNCell):\n  \"\"\"RNNCell wrapper that ensures cell inputs are added to the outputs.\"\"\"\n\n  def __init__(self, cell):\n    \"\"\"Constructs a `ResidualWrapper` for `cell`.\n\n    Args:\n      cell: An instance of `RNNCell`.\n    \"\"\"\n    self._cell = cell\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._cell.output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the cell and add its inputs to its outputs.\n\n    Args:\n      inputs: cell inputs.\n      state: cell state.\n      scope: optional cell scope.\n\n    Returns:\n      Tuple of cell outputs and new state.\n\n    Raises:\n      TypeError: If cell inputs and outputs have different structure (type).\n      ValueError: If cell inputs and outputs have different structure (value).\n    \"\"\"\n    outputs, new_state = self._cell(inputs, state, scope=scope)\n    nest.assert_same_structure(inputs, outputs)\n    # Ensure shapes match\n    def assert_shape_match(inp, out):\n      inp.get_shape().assert_is_compatible_with(out.get_shape())\n    nest.map_structure(assert_shape_match, inputs, outputs)\n    res_outputs = nest.map_structure(\n        lambda inp, out: inp + out, inputs, outputs)\n    return (res_outputs, new_state)\n\n\nclass DeviceWrapper(RNNCell):\n  \"\"\"Operator that ensures an RNNCell runs on a particular device.\"\"\"\n\n  def __init__(self, cell, device):\n    \"\"\"Construct a `DeviceWrapper` for `cell` with device `device`.\n\n    Ensures the wrapped `cell` is called with `tf.device(device)`.\n\n    Args:\n      cell: An instance of `RNNCell`.\n      device: A device string or function, for passing to `tf.device`.\n    \"\"\"\n    self._cell = cell\n    self._device = device\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._cell.output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the cell on specified device.\"\"\"\n    with ops.device(self._device):\n      return self._cell(inputs, state, scope=scope)\n\n\nclass EmbeddingWrapper(RNNCell):\n  \"\"\"Operator adding input embedding to the given cell.\n\n  Note: in many cases it may be more efficient to not use this wrapper,\n  but instead concatenate the whole sequence of your inputs in time,\n  do the embedding on this batch-concatenated sequence, then split it and\n  feed into your RNN.\n  \"\"\"\n\n  def __init__(self, cell, embedding_classes, embedding_size, initializer=None,\n               reuse=None):\n    \"\"\"Create a cell with an added input embedding.\n\n    Args:\n      cell: an RNNCell, an embedding will be put before its inputs.\n      embedding_classes: integer, how many symbols will be embedded.\n      embedding_size: integer, the size of the vectors we embed into.\n      initializer: an initializer to use when creating the embedding;\n        if None, the initializer from variable scope or a default one is used.\n      reuse: (optional) Python boolean describing whether to reuse variables\n        in an existing scope.  If not `True`, and the existing scope already has\n        the given variables, an error is raised.\n\n    Raises:\n      TypeError: if cell is not an RNNCell.\n      ValueError: if embedding_classes is not positive.\n    \"\"\"\n    if not isinstance(cell, RNNCell):\n      raise TypeError(\"The parameter cell is not RNNCell.\")\n    if embedding_classes <= 0 or embedding_size <= 0:\n      raise ValueError(\"Both embedding_classes and embedding_size must be > 0: \"\n                       \"%d, %d.\" % (embedding_classes, embedding_size))\n    self._cell = cell\n    self._embedding_classes = embedding_classes\n    self._embedding_size = embedding_size\n    self._initializer = initializer\n    self._reuse = reuse\n\n  @property\n  def state_size(self):\n    return self._cell.state_size\n\n  @property\n  def output_size(self):\n    return self._cell.output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      return self._cell.zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run the cell on embedded inputs.\"\"\"\n    with _checked_scope(self, scope or \"embedding_wrapper\", reuse=self._reuse):\n      with ops.device(\"/cpu:0\"):\n        if self._initializer:\n          initializer = self._initializer\n        elif vs.get_variable_scope().initializer:\n          initializer = vs.get_variable_scope().initializer\n        else:\n          # Default initializer for embeddings should have variance=1.\n          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.\n          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)\n\n        if type(state) is tuple:\n          data_type = state[0].dtype\n        else:\n          data_type = state.dtype\n\n        embedding = vs.get_variable(\n            \"embedding\", [self._embedding_classes, self._embedding_size],\n            initializer=initializer,\n            dtype=data_type)\n        embedded = embedding_ops.embedding_lookup(\n            embedding, array_ops.reshape(inputs, [-1]))\n    return self._cell(embedded, state)\n\n\nclass MultiRNNCell(RNNCell):\n  \"\"\"RNN cell composed sequentially of multiple simple cells.\"\"\"\n\n  def __init__(self, cells, state_is_tuple=True):\n    \"\"\"Create a RNN cell composed sequentially of a number of RNNCells.\n\n    Args:\n      cells: list of RNNCells that will be composed in this order.\n      state_is_tuple: If True, accepted and returned states are n-tuples, where\n        `n = len(cells)`.  If False, the states are all\n        concatenated along the column axis.  This latter behavior will soon be\n        deprecated.\n\n    Raises:\n      ValueError: if cells is empty (not allowed), or at least one of the cells\n        returns a state tuple but the flag `state_is_tuple` is `False`.\n    \"\"\"\n    if not cells:\n      raise ValueError(\"Must specify at least one cell for MultiRNNCell.\")\n    if not nest.is_sequence(cells):\n      raise TypeError(\n          \"cells must be a list or tuple, but saw: %s.\" % cells)\n\n    self._cells = cells\n    self._state_is_tuple = state_is_tuple\n    if not state_is_tuple:\n      if any(nest.is_sequence(c.state_size) for c in self._cells):\n        raise ValueError(\"Some cells return tuples of states, but the flag \"\n                         \"state_is_tuple is not set.  State sizes are: %s\"\n                         % str([c.state_size for c in self._cells]))\n\n  @property\n  def state_size(self):\n    if self._state_is_tuple:\n      return tuple(cell.state_size for cell in self._cells)\n    else:\n      return sum([cell.state_size for cell in self._cells])\n\n  @property\n  def output_size(self):\n    return self._cells[-1].output_size\n\n  def zero_state(self, batch_size, dtype):\n    with ops.name_scope(type(self).__name__ + \"ZeroState\", values=[batch_size]):\n      if self._state_is_tuple:\n        return tuple(cell.zero_state(batch_size, dtype) for cell in self._cells)\n      else:\n        # We know here that state_size of each cell is not a tuple and\n        # presumably does not contain TensorArrays or anything else fancy\n        return super(MultiRNNCell, self).zero_state(batch_size, dtype)\n\n  def __call__(self, inputs, state, scope=None):\n    \"\"\"Run this multi-layer cell on inputs, starting from state.\"\"\"\n    with vs.variable_scope(scope or \"multi_rnn_cell\"):\n      cur_state_pos = 0\n      cur_inp = inputs\n      new_states = []\n      for i, cell in enumerate(self._cells):\n        with vs.variable_scope(\"cell_%d\" % i):\n          if self._state_is_tuple:\n            if not nest.is_sequence(state):\n              raise ValueError(\n                  \"Expected state to be a tuple of length %d, but received: %s\"\n                  % (len(self.state_size), state))\n            cur_state = state[i]\n          else:\n            cur_state = array_ops.slice(\n                state, [0, cur_state_pos], [-1, cell.state_size])\n            cur_state_pos += cell.state_size\n          cur_inp, new_state = cell(cur_inp, cur_state)\n          new_states.append(new_state)\n    new_states = (tuple(new_states) if self._state_is_tuple else\n                  array_ops.concat(new_states, 1))\n    return cur_inp, new_states\n\n\nclass _SlimRNNCell(RNNCell):\n  \"\"\"A simple wrapper for slim.rnn_cells.\"\"\"\n\n  def __init__(self, cell_fn):\n    \"\"\"Create a SlimRNNCell from a cell_fn.\n\n    Args:\n      cell_fn: a function which takes (inputs, state, scope) and produces the\n        outputs and the new_state. Additionally when called with inputs=None and\n        state=None it should return (initial_outputs, initial_state).\n\n    Raises:\n      TypeError: if cell_fn is not callable\n      ValueError: if cell_fn cannot produce a valid initial state.\n    \"\"\"\n    if not callable(cell_fn):\n      raise TypeError(\"cell_fn %s needs to be callable\", cell_fn)\n    self._cell_fn = cell_fn\n    self._cell_name = cell_fn.func.__name__\n    init_output, init_state = self._cell_fn(None, None)\n    output_shape = init_output.get_shape()\n    state_shape = init_state.get_shape()\n    self._output_size = output_shape.with_rank(2)[1].value\n    self._state_size = state_shape.with_rank(2)[1].value\n    if self._output_size is None:\n      raise ValueError(\"Initial output created by %s has invalid shape %s\" %\n                       (self._cell_name, output_shape))\n    if self._state_size is None:\n      raise ValueError(\"Initial state created by %s has invalid shape %s\" %\n                       (self._cell_name, state_shape))\n\n  @property\n  def state_size(self):\n    return self._state_size\n\n  @property\n  def output_size(self):\n    return self._output_size\n\n  def __call__(self, inputs, state, scope=None):\n    scope = scope or self._cell_name\n    output, state = self._cell_fn(inputs, state, scope=scope)\n    return output, state\n\n\ndef _linear(args, output_size, bias, bias_start=0.0, scope=None):\n  \"\"\"Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.\n\n  Args:\n    args: a 2D Tensor or a list of 2D, batch x n, Tensors.\n    output_size: int, second dimension of W[i].\n    bias: boolean, whether to add a bias term or not.\n    bias_start: starting value to initialize the bias; 0 by default.\n\n  Returns:\n    A 2D Tensor with shape [batch x output_size] equal to\n    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.\n\n  Raises:\n    ValueError: if some of the arguments has unspecified or wrong shape.\n  \"\"\"\n  if args is None or (nest.is_sequence(args) and not args):\n    raise ValueError(\"`args` must be specified\")\n  if not nest.is_sequence(args):\n    args = [args]\n\n  # Calculate the total size of arguments on dimension 1.\n  total_arg_size = 0\n  shapes = [a.get_shape() for a in args]\n  for shape in shapes:\n    if shape.ndims != 2:\n      raise ValueError(\"linear is expecting 2D arguments: %s\" % shapes)\n    if shape[1].value is None:\n      raise ValueError(\"linear expects shape[1] to be provided for shape %s, \"\n                       \"but saw %s\" % (shape, shape[1]))\n    else:\n      total_arg_size += shape[1].value\n\n  dtype = [a.dtype for a in args][0]\n\n  # Now the computation.\n  scope = vs.get_variable_scope()\n  with vs.variable_scope(scope) as outer_scope:\n    weights = vs.get_variable(\n        _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype)\n    if len(args) == 1:\n      res = math_ops.matmul(args[0], weights)\n    else:\n      res = math_ops.matmul(array_ops.concat(args, 1), weights)\n    if not bias:\n      return res\n    with vs.variable_scope(outer_scope) as inner_scope:\n      inner_scope.set_partitioner(None)\n      biases = vs.get_variable(\n          _BIAS_VARIABLE_NAME, [output_size],\n          dtype=dtype,\n          initializer=init_ops.constant_initializer(bias_start, dtype=dtype))\n    return nn_ops.bias_add(res, biases)\n"
  },
  {
    "path": "model.py",
    "content": "import tensorflow as tf\nimport numpy as np\n# from data_utils import get_batch\nimport data_utils\nimport pdb\nimport json\nimport sys\nfrom mod_core_rnn_cell_impl import LSTMCell  # modified to allow initializing bias in lstm\n\n# from tensorflow.contrib.rnn import LSTMCell\ntf.logging.set_verbosity(tf.logging.ERROR)\nimport mmd\n\nfrom differential_privacy.dp_sgd.dp_optimizer import dp_optimizer\nfrom differential_privacy.dp_sgd.dp_optimizer import sanitizer\nfrom differential_privacy.privacy_accountant.tf import accountant\n\n# ------------------------------- #\n\"\"\"\nMost of the models are copied from https://github.com/ratschlab/RGAN\n\"\"\"\n\n# --- to do with latent space --- #\n\ndef sample_Z(batch_size, seq_length, latent_dim, use_time=False, use_noisy_time=False):\n    sample = np.float32(np.random.normal(size=[batch_size, seq_length, latent_dim]))\n    if use_time:\n        print('WARNING: use_time has different semantics')\n        sample[:, :, 0] = np.linspace(0, 1.0 / seq_length, num=seq_length)\n    return sample\n\n# --- samples for testing ---#\n\ndef sample_T(batch_size, batch_idx):\n    samples_aaa = np.load('./data/samples_aa.npy')\n    num_samples_t = samples_aaa.shape[0]\n    labels_aaa = np.load('./data/labels_aa.npy')\n    idx_aaa = np.load('./data/idx_aa.npy')\n    start_pos = batch_idx * batch_size\n    end_pos = start_pos + batch_size\n    T_mb = samples_aaa[start_pos:end_pos, :, :]\n    L_mb = labels_aaa[start_pos:end_pos, :, :]\n    I_mb = idx_aaa[start_pos:end_pos, :, :]\n    return T_mb, L_mb, I_mb, num_samples_t\n\ndef sample_TT(batch_size):\n    samples_aaa = np.load('./data/samples_aa.npy')\n    labels_aaa = np.load('./data/labels_aa.npy')\n    idx_aaa = np.load('./data/idx_aa.npy')\n    T_indices = np.random.choice(len(samples_aaa), size=batch_size, replace=False)\n    T_mb = samples_aaa[T_indices, :, :]\n    L_mb = labels_aaa[T_indices, :, :]\n    I_mb = idx_aaa[T_indices, :, :]\n    return T_mb, L_mb, I_mb\n\n# --- to do with training --- #\ndef train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver,\n                batch_size, use_time, D_rounds, G_rounds, seq_length,\n                latent_dim, num_signals):\n    \"\"\"\n    Train generator and discriminator for one epoch.\n    \"\"\"\n    # for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds):\n    for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + G_rounds), D_rounds + G_rounds):\n        # update the discriminator\n        X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels)\n        Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time)\n        for d in range(D_rounds):\n            # run the discriminator solver\n            _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb})\n\n        # update the generator\n        for g in range(G_rounds):\n            # run the generator solver\n            _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)})\n\n    # at the end, get the loss\n    D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb,\n                                                                     Z: sample_Z(batch_size, seq_length, latent_dim,\n                                                                                 use_time=use_time)})\n    D_loss_curr = np.mean(D_loss_curr)\n    G_loss_curr = np.mean(G_loss_curr)\n\n\n    return D_loss_curr, G_loss_curr\n\n\ndef GAN_loss(Z, X, generator_settings, discriminator_settings):\n\n    # normal GAN\n    G_sample = generator(Z, **generator_settings)\n\n    D_real, D_logit_real = discriminator(X, **discriminator_settings)\n\n    D_fake, D_logit_fake = discriminator(G_sample, reuse=True, **discriminator_settings)\n\n    # Measures the probability error in discrete classification tasks in which each class is independent\n    # and not mutually exclusive.\n    # logits: predicted labels??\n\n    D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)), 1)\n    D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)), 1)\n\n    D_loss = D_loss_real + D_loss_fake\n\n\n    # G_loss = tf.reduce_mean(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), axis=1))\n    G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), 1)\n\n\n    return D_loss, G_loss\n\n\ndef GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_examples, l2norm_bound, batches_per_lot, sigma, dp=False):\n    \"\"\"\n    Optimizers\n    \"\"\"\n    discriminator_vars = [v for v in tf.trainable_variables() if v.name.startswith('discriminator')]\n    generator_vars = [v for v in tf.trainable_variables() if v.name.startswith('generator')]\n    if dp:\n        print('Using differentially private SGD to train discriminator!')\n        eps = tf.placeholder(tf.float32)\n        delta = tf.placeholder(tf.float32)\n        priv_accountant = accountant.GaussianMomentsAccountant(total_examples)\n        clip = True\n        l2norm_bound = l2norm_bound / batch_size\n        batches_per_lot = 1\n        gaussian_sanitizer = sanitizer.AmortizedGaussianSanitizer(\n            priv_accountant,\n            [l2norm_bound, clip])\n\n        # the trick is that we need to calculate the gradient with respect to\n        # each example in the batch, during the DP SGD step\n        D_solver = dp_optimizer.DPGradientDescentOptimizer(learning_rate,\n                                                           [eps, delta],\n                                                           sanitizer=gaussian_sanitizer,\n                                                           sigma=sigma,\n                                                           batches_per_lot=batches_per_lot).minimize(D_loss, var_list=discriminator_vars)\n    else:\n        D_loss_mean_over_batch = tf.reduce_mean(D_loss)\n        D_solver = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(D_loss_mean_over_batch, var_list=discriminator_vars)\n        priv_accountant = None\n    G_loss_mean_over_batch = tf.reduce_mean(G_loss)\n    G_solver = tf.train.AdamOptimizer().minimize(G_loss_mean_over_batch, var_list=generator_vars)\n    return D_solver, G_solver, priv_accountant\n\n\n# --- to do with the model --- #\n\ndef create_placeholders(batch_size, seq_length, latent_dim, num_signals):\n\n    Z = tf.placeholder(tf.float32, [batch_size, seq_length, latent_dim])\n    X = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals])\n    T = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals])\n    return Z, X, T\n\ndef generator(z, hidden_units_g, seq_length, batch_size, num_signals, reuse=False, parameters=None, learn_scale=True):\n\n    \"\"\"\n    If parameters are supplied, initialise as such\n    \"\"\"\n    with tf.variable_scope(\"generator\") as scope:\n        if reuse:\n            scope.reuse_variables()\n        if parameters is None:\n            W_out_G_initializer = tf.truncated_normal_initializer()\n            b_out_G_initializer = tf.truncated_normal_initializer()\n            scale_out_G_initializer = tf.constant_initializer(value=1.0)\n            lstm_initializer = None\n            bias_start = 1.0\n        else:\n            W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0'])\n            b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0'])\n            try:\n                scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0'])\n            except KeyError:\n                scale_out_G_initializer = tf.constant_initializer(value=1)\n                assert learn_scale\n            lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0'])\n            bias_start = parameters['generator/rnn/lstm_cell/biases:0']\n\n        W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_signals],\n                                  initializer=W_out_G_initializer)\n        b_out_G = tf.get_variable(name='b_out_G', shape=num_signals, initializer=b_out_G_initializer)\n        scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer,\n                                      trainable=learn_scale)\n        # inputs\n        inputs = z\n\n        cell = LSTMCell(num_units=hidden_units_g,\n                        state_is_tuple=True,\n                        initializer=lstm_initializer,\n                        bias_start=bias_start,\n                        reuse=reuse)\n        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(\n            cell=cell,\n            dtype=tf.float32,\n            sequence_length=[seq_length] * batch_size,\n            inputs=inputs)\n        rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g])\n        logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum\n        #        output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G)\n        output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1]\n        output_3d = tf.reshape(output_2d, [-1, seq_length, num_signals])\n\n    return output_3d\n\n\ndef discriminator(x, hidden_units_d, seq_length, batch_size, reuse=False, parameters=None, batch_mean=False):\n    with tf.variable_scope(\"discriminator\") as scope:\n        if reuse:\n            scope.reuse_variables()\n        if parameters is None:\n            W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1],\n                                      initializer=tf.truncated_normal_initializer())\n            b_out_D = tf.get_variable(name='b_out_D', shape=1,\n                                      initializer=tf.truncated_normal_initializer())\n\n        else:\n            W_out_D = tf.constant_initializer(value=parameters['discriminator/W_out_D:0'])\n            b_out_D = tf.constant_initializer(value=parameters['discriminator/b_out_D:0'])\n\n        # inputs\n        inputs = x\n\n        # add the average of the inputs to the inputs (mode collapse?\n        if batch_mean:\n            mean_over_batch = tf.stack([tf.reduce_mean(x, axis=0)] * batch_size, axis=0)\n            inputs = tf.concat([x, mean_over_batch], axis=2)\n\n        cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d,\n                                       state_is_tuple=True,\n                                       reuse=reuse)\n        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(\n            cell=cell,\n            dtype=tf.float32,\n            inputs=inputs)\n        # logit_final = tf.matmul(rnn_outputs[:, -1], W_final_D) + b_final_D\n        logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum\n        # real logits or actual output layer?\n        # logit is a function that maps probabilities ([0,1]) to ([-inf,inf]) ?\n\n        output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability??\n        # sigmoid output ([0,1]), Probability?\n\n    return output, logits\n\n\n# --- display ----#\ndef display_batch_progression(j, id_max):\n    '''\n    See epoch progression\n    '''\n    batch_progression = int((j / id_max) * 100)\n    sys.stdout.write(str(batch_progression) + ' % epoch' + chr(13))\n    _ = sys.stdout.flush\n\n\n# --- to do with saving/loading --- #\n\ndef dump_parameters(identifier, sess):\n    \"\"\"\n    Save model parmaters to a numpy file\n    \"\"\"\n    # dump_path = './experiments/parameters/' + identifier + '.npy'\n    dump_path = './experiments/parameters/' + identifier + '.npy'\n    model_parameters = dict()\n    for v in tf.trainable_variables():\n        model_parameters[v.name] = sess.run(v)\n    np.save(dump_path, model_parameters)\n    print('Recorded', len(model_parameters), 'parameters to', dump_path)\n    return True\n\n\ndef load_parameters(identifier):\n    \"\"\"\n    Load parameters from a numpy file\n    \"\"\"\n    # load_path = './experiments/plots/parameters/' + identifier + '.npy'\n    # load_path = './experiments/plots/parameters/parameters_60/' + identifier + '.npy'\n    # load_path = './experiments/parameters/' + identifier + '.npy'\n    # load_path = './experiments/parameters/' + identifier + '.npy'\n    model_parameters = np.load(identifier).item()\n    return model_parameters\n\n\n\n\n\n"
  },
  {
    "path": "plotting.py",
    "content": "import numpy as np\nimport matplotlib as mpl\nmpl.use('Agg')\nimport matplotlib.pyplot as plt\nimport pdb\nfrom time import time\nfrom matplotlib.colors import hsv_to_rgb\nfrom pandas import read_table, read_hdf\n\ndef plot_label(label, id):\n\n    fig, ax = plt.subplots(1, 1)\n    ax.plot(label)\n    fig.savefig(\"./Figs/Label_\" + str(id).zfill(2) + \".png\")\n    plt.clf()\n    plt.close(fig)\n\n    return\n\ndef visualise_at_epoch(vis_sample, data, predict_labels, epoch,\n        identifier, num_epochs, resample_rate_in_min, multivariate_mnist,\n        seq_length, labels):\n    # TODO: what's with all these arguments\n    if data == 'mnist':\n        if predict_labels:\n            n_labels = 1\n            if one_hot:\n                n_labels = 6\n                lab_votes = np.argmax(vis_sample[:, :, -n_labels:], axis=2)\n            else:\n                lab_votes = vis_sample[:, :, -n_labels:]\n            labs, _ = mode(lab_votes, axis=1)\n            samps = vis_sample[:, :, :-n_labels]\n        else:\n            labs = labels\n            samps = vis_sample\n        if multivariate_mnist:\n            save_mnist_plot_sample(samps.reshape(-1, seq_length**2, 1), epoch, identifier, n_samples=6, labels=labs)\n        else:\n            save_mnist_plot_sample(samps, epoch, identifier, n_samples=6, labels=labs)\n    else:\n        save_plot_sample(vis_sample, epoch, identifier, n_samples=6,\n                num_epochs=num_epochs)\n\n    return True\n\n\ndef save_plot_sample(samples, idx, identifier, n_samples=16, num_epochs=None, ncol=4):\n    assert n_samples <= samples.shape[0]\n    assert n_samples % ncol == 0\n    sample_length = samples.shape[1]\n  \n    if not num_epochs is None:\n        col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8)) #convert hsv values in a numpy array to rgb values all values assumed to be in range [0, 1].\n    else:\n        col = 'grey'\n\n    x_points = np.arange(sample_length) #Return evenly spaced values within a given interval.\n\n    nrow = int(n_samples/ncol)\n    fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6))\n    for m in range(nrow):\n        for n in range(ncol):\n            # first column\n            sample = samples[n*nrow + m, :, :]\n            axarr[m, n].plot(x_points, sample, color=col)\n            # axarr[m, n].set_ylim(-1, 1)\n    for n in range(ncol):\n        axarr[-1, n].xaxis.set_ticks(range(0, sample_length, int(sample_length/4)))\n    fig.suptitle(idx)\n    fig.subplots_adjust(hspace = 0.15)\n    fig.savefig(\"./experiments/plots/gs/\" + identifier + \"_epoch\" + str(idx).zfill(4) + \".png\")\n    plt.clf()\n    plt.close()\n    return\n\ndef save_plot_interpolate(input_samples, samples, idx, identifier,  num_epochs=None, distances=None, sigma=1):\n    \"\"\" very boilerplate, unsure how to make nicer \"\"\"\n    n_samples = samples.shape[0]\n    sample_length = samples.shape[1]\n  \n    if not num_epochs is None:\n        col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8))\n    else:\n        col = 'grey'\n\n    x_points = np.arange(sample_length)\n    if distances is None:\n        nrow = n_samples\n    else:\n        nrow = n_samples + 1\n    ncol = 1\n    fig, axarr = plt.subplots(nrow, ncol, figsize=(3, 9))\n    if distances is None:\n        startat = 0\n    else:\n        startat = 1\n        axarr[0].plot(distances.dA, color='green', label='distance from A', linestyle='--', marker='o', markersize=4)\n        axarr[0].plot(distances.dB, color='orange', label='distance from B', linestyle='dotted', marker='o', markersize=4)\n        axarr[0].get_xaxis().set_visible(False)\n        axarr[0].set_title('distance from endpoints')\n    for m in range(startat, nrow):\n        sample = samples[m-startat, :, 0]\n        axarr[m].plot(x_points, sample, color=col)\n    for m in range(startat, nrow):\n        axarr[m].set_ylim(-1.1, 1.1)\n        axarr[m].set_xlim(0, sample_length)\n        axarr[m].spines[\"top\"].set_visible(False)\n        axarr[m].spines[\"bottom\"].set_visible(False)\n        axarr[m].spines[\"right\"].set_visible(False)\n        axarr[m].spines[\"left\"].set_visible(False)\n        axarr[m].tick_params(bottom='off', left='off')\n        axarr[m].get_xaxis().set_visible(False)\n        axarr[m].get_yaxis().set_visible(False)\n        axarr[m].set_facecolor((0.96, 0.96, 0.96))\n    if not input_samples is None:\n        # now do the real samples\n        axarr[startat].plot(x_points, input_samples[0], color='green', linestyle='--')\n        axarr[-1].plot(x_points, input_samples[1], color='green', linestyle='--')\n    \n    axarr[-1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4)))\n    fig.suptitle(idx)\n    fig.subplots_adjust(hspace = 0.2)\n    fig.savefig(\"./experiments/plots/\" + identifier + \"_interpolate.png\")\n    fig.savefig(\"./experiments/plots/\" + identifier + \"_interpolate.pdf\")\n    plt.clf()\n    plt.close()\n    return\n\ndef reconstruction_errors(identifier, train_errors, vali_errors, \n                          generated_errors, random_errors):\n    \"\"\"\n    Plot two histogram of the reconstruction errors.\n    \"\"\"\n    print(identifier)\n    fig, axarr = plt.subplots(4, 1, sharex=True, figsize=(4, 8))\n    axarr[0].hist(train_errors, normed=1, color='green', bins=50)\n    axarr[0].set_title(\"train reconstruction errors\")\n    axarr[1].hist(vali_errors, normed=1, color='blue', bins=50)\n    axarr[1].set_title('vali reconstruction errors')\n    axarr[2].hist(generated_errors, normed=1, color='pink', bins=50)\n    axarr[2].set_title('generated reconstruction errors')\n    axarr[3].hist(random_errors, normed=1, color='grey', bins=50)\n    axarr[3].set_title('random reconstruction errors')\n    for ax in axarr:\n        ax.spines[\"top\"].set_visible(False)\n        ax.spines[\"bottom\"].set_visible(False)\n        ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"left\"].set_visible(False)\n        ax.tick_params(bottom='off', left='off')\n        ax.get_xaxis().set_visible(False)\n        ax.get_yaxis().set_visible(False)\n    axarr[3].set_xlim(0, 0.05)\n    plt.tight_layout()\n    plt.savefig('./experiments/plots/' + identifier + '_reconstruction_errors.png')\n    return True\n\ndef save_plot_reconstruct(real_samples, model_samples, identifier):\n    assert real_samples.shape == model_samples.shape\n    sample_length = real_samples.shape[1]\n    x_points = np.arange(sample_length)\n    nrow = real_samples.shape[0]\n    ncol = 2\n    fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6))\n    for m in range(nrow):\n        real_sample = real_samples[m, :, 0]\n        model_sample = model_samples[m, :, 0]\n        axarr[m, 0].plot(x_points, real_sample, color='green')\n        axarr[m, 1].plot(x_points, model_sample, color='red')\n    axarr[-1, 0].xaxis.set_ticks(range(0, sample_length, int(sample_length/4)))\n    axarr[-1, 1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4)))\n    axarr[0, 0].set_title('real')\n    axarr[0, 1].set_title('reconstructed')\n    fig.subplots_adjust(hspace = 0.15)\n    fig.savefig(\"./experiments/plots/\" + identifier + \"_reconstruct.png\")\n    plt.clf()\n    plt.close()\n    return\n\ndef save_plot_vary_dimension(samples_list, idx, identifier, n_dim):\n    \"\"\"\n    \"\"\"\n    assert len(samples_list) == n_dim\n    sample_length = samples_list[0].shape[1]\n  \n    x_points = np.arange(sample_length)\n\n    nrow = samples_list[0].shape[0]\n    sidelength = n_dim*1.5\n    fig, axarr = plt.subplots(nrow, n_dim, sharex=True, sharey=True, figsize=(sidelength, sidelength))\n    for dim in range(n_dim):\n        sample_dim = samples_list[dim]\n        axarr[0, dim].set_title(dim)\n        h = dim*1.0/n_dim       # hue\n        for n in range(nrow):\n            sample = sample_dim[n, :, 0]\n            axarr[n, dim].plot(x_points, sample, color='black')\n            axarr[n, dim].spines[\"top\"].set_visible(False)\n            axarr[n, dim].spines[\"bottom\"].set_visible(False)\n            axarr[n, dim].spines[\"right\"].set_visible(False)\n            axarr[n, dim].spines[\"left\"].set_visible(False)\n            axarr[n, dim].tick_params(bottom='off', left='off')\n            axarr[n, dim].get_xaxis().set_visible(False)\n            axarr[n, dim].set_facecolor(hsv_to_rgb((h, 0 + 0.25*n/nrow, 0.96)))\n        axarr[-1, dim].xaxis.set_ticks(range(0, sample_length, int(sample_length/4)))\n    fig.suptitle(idx)\n    fig.subplots_adjust(hspace = 0.11, wspace=0.11)\n    fig.savefig(\"./experiments/plots/\" + identifier + \"_epoch\" + str(idx).zfill(4) + \".png\")\n    plt.clf()\n    plt.close()\n    return True\n\ndef interpolate(sampleA, sampleB=None, n_steps=6):\n    \"\"\"\n    Plot the linear interpolation between two latent space points.\n    \"\"\"\n    weights = np.linspace(0, 1, n_steps)\n    if sampleB is None:\n        # do it \"close by\"\n        sampleB = sampleA + np.random.normal(size=sampleA.shape, scale=0.05)\n    samples = np.array([w*sampleB + (1-w)*sampleA for w in weights])\n    return samples\n\ndef vary_latent_dimension(sample, dimension, n_steps=6):\n    \"\"\"\n    \"\"\"\n    assert dimension <= sample.shape[1]\n    scale = np.mean(np.abs(sample[:, dimension]))\n    deviations = np.linspace(0, 2*scale, n_steps)\n    samples = np.array([sample[:, :]]*n_steps)\n    for n in range(n_steps):\n        samples[n, :, dimension] += deviations[n]\n    return samples\n\ndef plot_sine_evaluation(real_samples, fake_samples, idx, identifier):\n    \"\"\" \n    Create histogram of fake (generated) samples frequency, amplitude distribution.\n    Also for real samples.\n    \"\"\"\n    ### frequency\n    seq_length = len(real_samples[0])    # assumes samples are all the same length\n    frate = seq_length\n    freqs_hz = np.fft.rfftfreq(seq_length)*frate      # this is for labelling the plot\n    # TODO, just taking axis 0 for now...\n    w_real = np.mean(np.abs(np.fft.rfft(real_samples[:, :, 0])), axis=0)\n    w_fake = np.mean(np.abs(np.fft.rfft(fake_samples[:, :, 0])), axis=0)\n    ### amplitude\n    A_real = np.max(np.abs(real_samples[:, :, 0]), axis=1) \n    A_fake = np.max(np.abs(fake_samples[:, :, 0]), axis=1) \n    ### now plot\n    nrow = 2\n    ncol = 2\n    fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6))\n    # freq\n    axarr[0, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_real), w_real), ymax=np.maximum(np.zeros_like(w_real), w_real), color='#30ba50')\n    axarr[0, 0].set_title(\"frequency\", fontsize=16)\n    axarr[0, 0].set_ylabel(\"real\", fontsize=16)\n    axarr[1, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_fake), w_fake), ymax=np.maximum(np.zeros_like(w_fake), w_fake), color='#ba4730')\n    axarr[1, 0].set_ylabel(\"generated\", fontsize=16)\n    # amplitude\n    axarr[0, 1].hist(A_real, normed=True, color='#30ba50', bins=30)\n    axarr[0, 1].set_title(\"amplitude\", fontsize=16)\n    axarr[1, 1].hist(A_fake, normed=True, color='#ba4730', bins=30)\n   \n    fig.savefig('./experiments/plots/' + identifier + '_eval' + str(idx).zfill(4) +'.png')\n    plt.clf()\n    plt.close()\n    return True\n\ndef plot_trace(identifier, xmax=250, final=False, dp=False):\n    \"\"\"\n    \"\"\"\n\n    trace_path = './experiments/traces/' + identifier + '.trace.txt'\n    da = read_table(trace_path, sep=' ')\n    nrow = 3\n    if dp:\n        trace_dp_path = './experiments/traces/' + identifier + '.dptrace.txt'\n        da_dp = read_table(trace_dp_path, sep=' ')\n        nrow += 1\n    \n    ncol=1  \n    fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6))\n\n    # D_loss\n    d_handle,  = axarr[0].plot(da.epoch, da.D_loss, color='red', label='discriminator')\n    axarr[0].set_ylabel('D loss')\n#    axarr[0].set_ylim(0.9, 1.6)\n    if final:\n        #D_ticks = [1.0, 1.2, 1.5]\n        D_ticks = [0.5, 1.0, 1.5]\n        axarr[0].get_yaxis().set_ticks(D_ticks)\n        for tick in D_ticks:\n            axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0)\n    # G loss\n    ax_G = axarr[0].twinx()\n    g_handle,  = ax_G.plot(da.epoch, da.G_loss, color='green', ls='dashed', label='generator')\n    ax_G.set_ylabel('G loss')\n    if final:\n        G_ticks = [2.5, 5]\n        ax_G.get_yaxis().set_ticks(G_ticks)\n#        for tick in G_ticks:\n#            axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='green', alpha=1.0, zorder=0)\n\n    ax_G.spines[\"top\"].set_visible(False)\n    ax_G.spines[\"bottom\"].set_visible(False)\n    ax_G.spines[\"right\"].set_visible(False)\n    ax_G.spines[\"left\"].set_visible(False)\n    ax_G.tick_params(bottom='off', right='off')\n    axarr[0].legend(handles=[d_handle, g_handle], labels=['discriminator', 'generator'])\n\n    # mmd\n    da_mmd = da.loc[:, ['epoch', 'mmd2']].dropna()\n    axarr[1].plot(da_mmd.epoch, da_mmd.mmd2, color='purple')\n    axarr[1].set_ylabel('MMD$^2$')\n    #axarr[1].set_ylim(0.0, 0.04)\n\n    #ax_that = axarr[1].twinx()\n    #ax_that.plot(da.that)\n    #ax_that.set_ylabel('$\\hat{t}$')\n    #ax_that.set_ylim(0, 50)\n    if final:\n        mmd_ticks = [0.01, 0.02, 0.03]\n        axarr[1].get_yaxis().set_ticks(mmd_ticks)\n        for tick in mmd_ticks:\n            axarr[1].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0)\n\n    # log likelihood\n    da_ll = da.loc[:, ['epoch', 'll', 'real_ll']].dropna()\n    axarr[2].plot(da_ll.epoch, da_ll.ll, color='orange')\n    axarr[2].plot(da_ll.epoch, da_ll.real_ll, color='orange', alpha=0.5)\n    axarr[2].set_ylabel('likelihood')\n    axarr[2].set_xlabel('epoch')\n    axarr[2].set_ylim(-750, 100)\n    #axarr[2].set_ylim(-10000000, 500)\n    if final:\n#        ll_ticks = [-1.0*1e7, -0.5*1e7, 0]\n        ll_ticks = [-500 ,-250, 0]\n        axarr[2].get_yaxis().set_ticks(ll_ticks)\n        for tick in ll_ticks:\n            axarr[2].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0)\n   \n    if dp:\n        assert da_dp.columns[0] == 'epoch'\n        epochs = da_dp['epoch']\n        eps_values = da_dp.columns[1:]\n        for eps_string in eps_values:\n            if 'eps' in eps_string:\n                eps = eps_string[3:]\n            else:\n                eps = eps_string\n            deltas = da_dp[eps_string]\n            axarr[3].plot(epochs, deltas, label=eps)\n            axarr[3].set_ylabel('delta')\n            axarr[3].set_xlabel('epoch')\n        axarr[3].legend()\n\n    # beautify\n    for ax in axarr:\n        #ax.spines[\"top\"].set_visible(True)\n        ax.spines[\"top\"].set_color((0, 0, 0, 0.3))\n        #ax.spines[\"bottom\"].set_visible(False)\n        ax.spines[\"bottom\"].set_color((0, 0, 0, 0.3))\n        #ax.spines[\"right\"].set_visible(False)\n        ax.spines[\"right\"].set_color((0, 0, 0, 0.3))\n        #ax.spines[\"left\"].set_visible(False)\n        ax.spines[\"left\"].set_color((0, 0, 0, 0.3))\n        ax.tick_params(bottom='off', left='off')\n        # make background grey\n    #    ax.set_facecolor((0.96, 0.96, 0.96))\n        ymin, ymax = ax.get_ylim()\n        for x in np.arange(0, xmax+10, 10):\n            ax.plot((x, x), (ymin, ymax), ls='dotted', lw=0.5, color='black', alpha=0.40, zorder=0)\n        ax.set_xlim(-5, xmax)\n        ax.get_yaxis().set_label_coords(-0.11,0.5)\n\n    # bottom one\n\n    fig.savefig('./experiments/traces/' + identifier + '_trace.png')\n    fig.savefig('./experiments/traces/' + identifier + '_trace.pdf')\n    plt.clf()\n    plt.close()\n    return True\n\n\ndef save_samples(vis_sample, identifier, epoch):\n\n    np.save('./experiments/plots/gs/' + identifier + '_gs_%s.npy' % epoch, vis_sample)\n\n    return True\n\ndef save_samples_real(vis_real, identifier):\n\n    np.save('./experiments/plots/gs/' + identifier + '_gs_real.npy',  vis_real)\n\n    return True\n\ndef save_mnist_plot_sample(samples, idx, identifier, n_samples, labels=None):\n    \"\"\"\n    Generates a grid showing mnist digits.\n\n    \"\"\"\n    assert n_samples <= samples.shape[0]\n    if not labels is None:\n        assert n_samples <= len(labels)\n        if len(labels.shape) > 1 and not labels.shape[1] == 1:\n            # one-hot\n            label_titles = np.argmax(labels, axis=1)\n        else:\n            label_titles = labels\n    else:\n        label_titles = ['NA']*n_samples\n    assert n_samples % 2 == 0\n    img_size = int(np.sqrt(samples.shape[1]))\n    \n    nrow = int(n_samples/2)\n    ncol = 2\n    fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(8, 8))\n    for m in range(nrow):\n        # first column\n        sample = samples[m, :, 0]\n        axarr[m, 0].imshow(sample.reshape([img_size,img_size]), cmap='gray')            \n        axarr[m, 0].set_title(str(label_titles[m]))\n        # second column\n        sample = samples[nrow + m, :, 0]\n        axarr[m, 1].imshow(sample.reshape([img_size,img_size]), cmap='gray')           \n        axarr[m, 1].set_title(str(label_titles[m + nrow]))\n    fig.suptitle(idx)\n    fig.suptitle(idx)\n    fig.subplots_adjust(hspace = 0.15)\n    fig.savefig(\"./experiments/plots/\" + identifier + \"_epoch\" + str(idx).zfill(4) + \".png\")\n    plt.clf()\n    plt.close()\n    return\n\ndef visualise_latent(Z, identifier):\n    \"\"\"\n    visualise a SINGLE point in the latent space\n    \"\"\"\n    seq_length = Z.shape[0]\n    latent_dim = Z.shape[1]\n    if latent_dim > 2: \n        print('WARNING: Only visualising first two dimensions of latent space.')\n    h = np.random.random()\n    colours = np.array([hsv_to_rgb((h, i/seq_length, 0.96)) for i in range(seq_length)])\n#    plt.plot(Z[:, 0], Z[:, 1], c='grey', alpha=0.5)\n    for i in range(seq_length):\n        plt.scatter(Z[i, 0], Z[i, 1], marker='o', c=colours[i])\n    plt.savefig('./experiments/plots/' + identifier + '_Z.png')\n    plt.clf()\n    plt.close()\n    return True\n\n\n# --- to do with the model --- #\ndef plot_parameters(parameters, identifier):\n    \"\"\"\n    visualise the parameters of a GAN\n    \"\"\"\n    generator_out = parameters['generator/W_out_G:0']\n    generator_weights = parameters['generator/rnn/lstm_cell/weights:0'] # split this into four\n    generator_matrices = np.split(generator_weights, 4, 1)\n    fig, axarr = plt.subplots(5, 1, sharex=True, \n            gridspec_kw = {'height_ratios':[0.2, 1, 1, 1, 1]}, figsize=(3,13))\n\n    axarr[0].matshow(generator_out.T, extent=[0,100,0,100])\n    axarr[0].set_title('W_out_G')\n    axarr[1].matshow(generator_matrices[0])\n    axarr[1].set_title('LSTM weights (1)')\n    axarr[2].matshow(generator_matrices[1])\n    axarr[2].set_title('LSTM weights (2)')\n    axarr[3].matshow(generator_matrices[2])\n    axarr[3].set_title('LSTM weights (3)')\n    axarr[4].matshow(generator_matrices[3])\n    axarr[4].set_title('LSTM weights (4)')\n    for a in axarr:\n        a.set_xlim(0, 100)\n        a.set_ylim(0, 100)\n        a.spines[\"top\"].set_visible(False)\n        a.spines[\"bottom\"].set_visible(False)\n        a.spines[\"right\"].set_visible(False)\n        a.spines[\"left\"].set_visible(False)\n        a.get_xaxis().set_visible(False)\n        a.get_yaxis().set_visible(False)\n#        a.tick_params(bottom='off', left='off', top='off')\n    plt.tight_layout()\n    plt.savefig('./experiments/plots/' + identifier + '_weights.png')\n    return True\n\n### TSTR ###\ndef view_mnist_eval(identifier, train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY):\n    \"\"\"\n    Basically just \n    http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html\n    \"\"\"\n    # resize everything\n    side_length = int(np.sqrt(train_X.shape[1]))\n    train_X = train_X.reshape(-1, side_length, side_length)\n    synth_X = synth_X.reshape(-1, side_length, side_length)\n    test_X = test_X.reshape(-1, side_length, side_length)\n    # remember, they're wrecked in the outer function thanks to python\n    synth_images_and_labels = list(zip(synth_X, synth_Y))\n    for index, (image, label) in enumerate(synth_images_and_labels[:4]):\n        plt.subplot(4, 4, index + 1)\n        plt.axis('off')\n        plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n        if index == 0:\n            plt.title('synth train: %i' % label)\n        else:\n            plt.title('%i' % label)\n    train_images_and_labels = list(zip(train_X, train_Y))\n    for index, (image, label) in enumerate(train_images_and_labels[:4]):\n        plt.subplot(4, 4, index + 5)\n        plt.axis('off')\n        plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n        if index == 0:\n            plt.title('real train: %i' % label)\n        else:\n            plt.title('%i' % label)\n    images_and_synthpreds = list(zip(test_X, synth_predY))\n    for index, (image, prediction) in enumerate(images_and_synthpreds[:4]):\n        plt.subplot(4, 4, index + 9)\n        plt.axis('off')\n        plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n        if index == 0:\n            plt.title('synth pred: %i' % prediction)\n        else:\n            plt.title('%i' % prediction)\n    images_and_realpreds = list(zip(test_X, real_predY))\n    for index, (image, prediction) in enumerate(images_and_realpreds[:4]):\n        plt.subplot(4, 4, index + 13)\n        plt.axis('off')\n        plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n        if index == 0:\n            plt.title('real pred: %i' % prediction)\n        else:\n            plt.title('%i' % prediction)\n    plt.tight_layout()\n    plt.title(identifier)\n    plt.savefig('./experiments/tstr/' + identifier + '_preds.png')\n    return True\n\n# --- nips !!! --- #\ndef nips_plot_rbf(sample, index, which='train'):\n    if which == 'train':\n#        col = '#167ea0'\n        col = '#13af5f'\n    else:\n        col = 'black'\n    sample_length = len(sample)\n    sample = sample.reshape(sample_length)\n    x_points = np.arange(sample_length)\n    fig, axarr = plt.subplots(1, 1, figsize=(2, 2))\n    axarr.set_facecolor((0.95, 0.96, 0.96))\n    axarr.plot(x_points, sample, color=col)\n    axarr.set_ylim(-1.5, 1.5)\n    axarr.get_xaxis().set_visible(False)\n    axarr.get_yaxis().set_visible(False)\n    axarr.spines[\"top\"].set_visible(False)\n    axarr.spines[\"bottom\"].set_visible(False)\n    axarr.spines[\"right\"].set_visible(False)\n    axarr.spines[\"left\"].set_visible(False)\n    axarr.tick_params(bottom='off', left='off')\n    plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.png')\n    plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.pdf')\n    plt.clf()\n    plt.close()\n    return True\n\ndef nips_plot_sine(sample, index, which='train'):\n    if which == 'train':\n        #col = '#167ea0'\n        #col = '#13af5f'\n        col = '#1188ad'\n    else:\n        col = 'black'\n    sample_length = len(sample)\n    sample = sample.reshape(sample_length)\n    sample_length = len(sample)\n    sample = sample.reshape(sample_length)\n    x_points = np.arange(sample_length)\n    fig, axarr = plt.subplots(1, 1, figsize=(2, 2))\n    axarr.set_facecolor((0.95, 0.96, 0.96))\n    axarr.plot(x_points, sample, color=col)\n    axarr.set_ylim(-1.1, 1.1)\n    axarr.get_xaxis().set_visible(False)\n    axarr.get_yaxis().set_visible(False)\n    axarr.spines[\"top\"].set_visible(False)\n    axarr.spines[\"bottom\"].set_visible(False)\n    axarr.spines[\"right\"].set_visible(False)\n    axarr.spines[\"left\"].set_visible(False)\n    axarr.tick_params(bottom='off', left='off')\n    plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.png')\n    plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.pdf')\n    plt.clf()\n    plt.close()\n    return True\n\ndef nips_plot_mnist(sample, index, which='train'):\n    plt.axis('off')\n    plt.imshow(sample, cmap=plt.cm.gray, interpolation='nearest')\n    plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.png')\n    plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.pdf')\n    plt.clf()\n    plt.close()\n    return True\n"
  },
  {
    "path": "tf_ops.py",
    "content": "### from https://github.com/eugenium/MMD/blob/master/tf_ops.py\nimport tensorflow as tf\n\n\ndef sq_sum(t, name=None):\n    \"The squared Frobenius-type norm of a tensor, sum(t ** 2).\"\n    with tf.name_scope(name, \"SqSum\", [t]):\n        t = tf.convert_to_tensor(t, name='t')\n        return 2 * tf.nn.l2_loss(t)\n\n\ndef dot(x, y, name=None):\n    \"The dot product of two vectors x and y.\"\n    with tf.name_scope(name, \"Dot\", [x, y]):\n        x = tf.convert_to_tensor(x, name='x')\n        y = tf.convert_to_tensor(y, name='y')\n\n        x.get_shape().assert_has_rank(1)\n        y.get_shape().assert_has_rank(1)\n\n        return tf.squeeze(tf.matmul(tf.expand_dims(x, 0), tf.expand_dims(y, 1)))\n"
  },
  {
    "path": "utils.py",
    "content": "#!/usr/bin/env ipython\n# Utility functions that don't fit in other scripts\nimport argparse\nimport json\n\ndef rgan_options_parser():\n    \"\"\"\n    Define parser to parse options from command line, with defaults.\n    Refer to this function for definitions of various variables.\n    \"\"\"\n    parser = argparse.ArgumentParser(description='Train a GAN to generate sequential, real-valued data.')\n    # meta-option\n    parser.add_argument('--settings_file', help='json file of settings, overrides everything else', type=str, default='')\n    # options pertaining to data\n    parser.add_argument('--data', help='what kind of data to train with?',\n            default='gp_rbf',\n            choices=['gp_rbf', 'sine', 'mnist', 'load'])\n    # parser.add_argument('--num_samples', type=int, help='how many training examples \\\n    #                 to generate?', default=28*5*100)\n    # parser.add_argument('--num_samples_t', type=int, help='how many testing examples \\\n    #                     for anomaly detection?', default=28 * 5 * 100)\n    parser.add_argument('--seq_length', type=int, default=30)\n    parser.add_argument('--num_signals', type=int, default=1)\n    parser.add_argument('--normalise', type=bool, default=False, help='normalise the \\\n            training/vali/test data (during split)?')\n    # parser.add_argument('--AD', type=bool, default=False, help='should we conduct anomaly detection?')\n\n    ### for gp_rbf\n    parser.add_argument('--scale', type=float, default=0.1)\n            ### for sin (should be using subparsers for this...)\n    parser.add_argument('--freq_low', type=float, default=1.0)\n    parser.add_argument('--freq_high', type=float, default=5.0)\n    parser.add_argument('--amplitude_low', type=float, default=0.1)\n    parser.add_argument('--amplitude_high', type=float, default=0.9)\n            ### for mnist\n    parser.add_argument('--multivariate_mnist', type=bool, default=False)\n    parser.add_argument('--full_mnist', type=bool, default=False)\n            ### for loading\n    parser.add_argument('--data_load_from', type=str, default='')\n            ### for eICU\n    parser.add_argument('--resample_rate_in_min', type=int, default=15)\n    # hyperparameters of the model\n    parser.add_argument('--hidden_units_g', type=int, default=100)\n    parser.add_argument('--hidden_units_d', type=int, default=100)\n    parser.add_argument('--hidden_units_e', type=int, default=100)\n    parser.add_argument('--kappa', type=float, help='weight between final output \\\n            and intermediate steps in discriminator cost (1 = all \\\n            intermediate', default=1)\n    parser.add_argument('--latent_dim', type=int, default=5, help='dimensionality \\\n            of the latent/noise space')\n    parser.add_argument('--weight', type=int, default=0.5, help='weight of score')\n    parser.add_argument('--degree', type=int, default=1, help='norm degree')\n    parser.add_argument('--batch_mean', type=bool, default=False, help='append the mean \\\n            of the batch to all variables for calculating discriminator loss')\n    parser.add_argument('--learn_scale', type=bool, default=False, help='make the \\\n            \"scale\" parameter at the output of the generator learnable (else fixed \\\n            to 1')\n    # options pertaining to training\n    parser.add_argument('--learning_rate', type=float, default=0.1)\n    parser.add_argument('--batch_size', type=int, default=28)\n    parser.add_argument('--num_epochs', type=int, default=100)\n    parser.add_argument('--D_rounds', type=int, default=5, help='number of rounds \\\n            of discriminator training')\n    parser.add_argument('--G_rounds', type=int, default=1, help='number of rounds \\\n            of generator training')\n    parser.add_argument('--E_rounds', type=int, default=1, help='number of rounds \\\n               of encoder training')\n    # parser.add_argument('--use_time', type=bool, default=False, help='enforce \\\n    #         latent dimension 0 to correspond to time')\n    parser.add_argument('--shuffle', type=bool, default=True)\n    parser.add_argument('--eval_mul', type=bool, default=False)\n    parser.add_argument('--eval_an', type=bool, default=False)\n    parser.add_argument('--eval_single', type=bool, default=False)\n    parser.add_argument('--wrong_labels', type=bool, default=False, help='augment \\\n            discriminator loss with real examples with wrong (~shuffled, sort of) labels')\n    # options pertaining to evaluation and exploration\n    parser.add_argument('--identifier', type=str, default='test', help='identifier \\\n            string for output files')\n    parser.add_argument('--sub_id', type=str, default='test', help='identifier \\\n               string for load parameters')\n    # options pertaining to differential privacy\n    parser.add_argument('--dp', type=bool, default=False, help='train discriminator \\\n            with differentially private SGD?')\n    parser.add_argument('--l2norm_bound', type=float, default=1e-5,\n            help='bound on norm of individual gradients for DP training')\n    parser.add_argument('--batches_per_lot', type=int, default=1,\n            help='number of batches per lot (for DP)')\n    parser.add_argument('--dp_sigma', type=float, default=1e-5,\n            help='sigma for noise added (for DP)')\n\n    return parser\n\ndef load_settings_from_file(settings):\n    \"\"\"\n    Handle loading settings from a JSON file, filling in missing settings from\n    the command line defaults, but otherwise overwriting them.\n    \"\"\"\n    settings_path = './experiments/settings/' + settings['settings_file'] + '.txt'\n    print('Loading settings from', settings_path)\n    settings_loaded = json.load(open(settings_path, 'r'))\n    # check for settings missing in file\n    for key in settings.keys():\n        if not key in settings_loaded:\n            print(key, 'not found in loaded settings - adopting value from command line defaults: ', settings[key])\n            # overwrite parsed/default settings with those read from file, allowing for\n    # (potentially new) default settings not present in file\n    settings.update(settings_loaded)\n    return settings\n"
  }
]