[
  {
    "path": "README.md",
    "content": "# Membership Inference Attack against Machine Learning Models\nThis repository contains example of experiments for the paper Membership Inference Attack against Machine Learning Models (http://ieeexplore.ieee.org/document/7958568/). \n\n### Attack Experiment\n\npython attack.py train_feat_file train_label_file\n\n**train_feat_file** should be a text file where each line is a feature vector with floating point values separated by comma. \n**train_label_file** should be a text file where each line is a label with integer value. \n\nOnce data is loaded, we will split the data for training target model and shadow models and save the split to disk. Then we will train the target model as well as shadow models. Finally attack model can be trained with predictions from shadow models and test on the target model. \n"
  },
  {
    "path": "attack.py",
    "content": "from classifier import train as train_model, iterate_minibatches, load_dataset\r\nfrom sklearn.model_selection import train_test_split\r\nfrom sklearn.metrics import classification_report, accuracy_score\r\nimport numpy as np\r\nimport theano.tensor as T\r\nimport lasagne\r\nimport theano\r\nimport argparse\r\nimport os\r\nimport imp\r\nnp.random.seed(21312)\r\nMODEL_PATH = './model/'\r\nDATA_PATH = './data/'\r\n\r\nimport theano.gof.compiledir as cd\r\ncd.print_compiledir_content()\r\n\r\nif not os.path.exists(MODEL_PATH):\r\n    os.makedirs(MODEL_PATH)\r\n\r\nif not os.path.exists(DATA_PATH):\r\n    os.makedirs(DATA_PATH)\r\n\r\n\r\ndef load_trained_indices():\r\n    fname = MODEL_PATH + 'data_indices.npz'\r\n    with np.load(fname) as f:\r\n        indices = [f['arr_%d' % i] for i in range(len(f.files))]\r\n    return indices\r\n\r\n\r\ndef get_data_indices(data_size, target_train_size=int(1e4), sample_target_data=True):\r\n    train_indices = np.arange(data_size)\r\n    if sample_target_data:\r\n        target_data_indices = np.random.choice(train_indices, target_train_size, replace=False)\r\n        shadow_indices = np.setdiff1d(train_indices, target_data_indices)\r\n    else:\r\n        target_data_indices = train_indices[:target_train_size]\r\n        shadow_indices = train_indices[target_train_size:]\r\n    return target_data_indices, shadow_indices\r\n\r\n\r\ndef load_attack_data():\r\n    fname = MODEL_PATH + 'attack_train_data.npz'\r\n    with np.load(fname) as f:\r\n        train_x, train_y = [f['arr_%d' % i] for i in range(len(f.files))]\r\n    fname = MODEL_PATH + 'attack_test_data.npz'\r\n    with np.load(fname) as f:\r\n        test_x, test_y = [f['arr_%d' % i] for i in range(len(f.files))]\r\n    return train_x.astype('float32'), train_y.astype('int32'), test_x.astype('float32'), test_y.astype('int32')\r\n\r\n\r\ndef train_target_model(dataset, epochs=100, batch_size=100, learning_rate=0.01, l2_ratio=1e-7,\r\n                       n_hidden=50, model='nn', save=True):\r\n    train_x, train_y, test_x, test_y = dataset\r\n    output_layer = train_model(dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate,\r\n                               batch_size=batch_size, model=model, l2_ratio=l2_ratio)\r\n    # test data for attack model\r\n    attack_x, attack_y = [], []\r\n    input_var = T.matrix('x')\r\n    prob = lasagne.layers.get_output(output_layer, input_var, deterministic=True)\r\n    prob_fn = theano.function([input_var], prob)\r\n    # data used in training, label is 1\r\n    for batch in iterate_minibatches(train_x, train_y, batch_size, False):\r\n        attack_x.append(prob_fn(batch[0]))\r\n        attack_y.append(np.ones(batch_size))\r\n    # data not used in training, label is 0\r\n    for batch in iterate_minibatches(test_x, test_y, batch_size, False):\r\n        attack_x.append(prob_fn(batch[0]))\r\n        attack_y.append(np.zeros(batch_size))\r\n\r\n    attack_x = np.vstack(attack_x)\r\n    attack_y = np.concatenate(attack_y)\r\n    attack_x = attack_x.astype('float32')\r\n    attack_y = attack_y.astype('int32')\r\n\r\n    if save:\r\n        np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y)\r\n        np.savez(MODEL_PATH + 'target_model.npz', *lasagne.layers.get_all_param_values(output_layer))\r\n\r\n    classes = np.concatenate([train_y, test_y])\r\n    return attack_x, attack_y, classes\r\n\r\n\r\ndef train_shadow_models(n_hidden=50, epochs=100, n_shadow=20, learning_rate=0.05, batch_size=100, l2_ratio=1e-7,\r\n                        model='nn', save=True):\r\n    # for getting probabilities\r\n    input_var = T.matrix('x')\r\n    # for attack model\r\n    attack_x, attack_y = [], []\r\n    classes = []\r\n    for i in xrange(n_shadow):\r\n        print 'Training shadow model {}'.format(i)\r\n        data = load_data('shadow{}_data.npz'.format(i))\r\n        train_x, train_y, test_x, test_y = data\r\n        # train model\r\n        output_layer = train_model(data, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate,\r\n                                   batch_size=batch_size, model=model, l2_ratio=l2_ratio)\r\n        prob = lasagne.layers.get_output(output_layer, input_var, deterministic=True)\r\n        prob_fn = theano.function([input_var], prob)\r\n        print 'Gather training data for attack model'\r\n        attack_i_x, attack_i_y = [], []\r\n        # data used in training, label is 1\r\n        for batch in iterate_minibatches(train_x, train_y, batch_size, False):\r\n            attack_i_x.append(prob_fn(batch[0]))\r\n            attack_i_y.append(np.ones(batch_size))\r\n        # data not used in training, label is 0\r\n        for batch in iterate_minibatches(test_x, test_y, batch_size, False):\r\n            attack_i_x.append(prob_fn(batch[0]))\r\n            attack_i_y.append(np.zeros(batch_size))\r\n        attack_x += attack_i_x\r\n        attack_y += attack_i_y\r\n        classes.append(np.concatenate([train_y, test_y]))\r\n    # train data for attack model\r\n    attack_x = np.vstack(attack_x)\r\n    attack_y = np.concatenate(attack_y)\r\n    attack_x = attack_x.astype('float32')\r\n    attack_y = attack_y.astype('int32')\r\n    classes = np.concatenate(classes)\r\n    if save:\r\n        np.savez(MODEL_PATH + 'attack_train_data.npz', attack_x, attack_y)\r\n\r\n    return attack_x, attack_y, classes\r\n\r\n\r\ndef train_attack_model(classes, dataset=None, n_hidden=50, learning_rate=0.01, batch_size=200, epochs=50,\r\n                       model='nn', l2_ratio=1e-7):\r\n    if dataset is None:\r\n        dataset = load_attack_data()\r\n\r\n    train_x, train_y, test_x, test_y = dataset\r\n\r\n    train_classes, test_classes = classes\r\n    train_indices = np.arange(len(train_x))\r\n    test_indices = np.arange(len(test_x))\r\n    unique_classes = np.unique(train_classes)\r\n\r\n    true_y = []\r\n    pred_y = []\r\n    for c in unique_classes:\r\n        print 'Training attack model for class {}...'.format(c)\r\n        c_train_indices = train_indices[train_classes == c]\r\n        c_train_x, c_train_y = train_x[c_train_indices], train_y[c_train_indices]\r\n        c_test_indices = test_indices[test_classes == c]\r\n        c_test_x, c_test_y = test_x[c_test_indices], test_y[c_test_indices]\r\n        c_dataset = (c_train_x, c_train_y, c_test_x, c_test_y)\r\n        c_pred_y = train_model(c_dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate,\r\n                               batch_size=batch_size, model=model, rtn_layer=False, l2_ratio=l2_ratio)\r\n        true_y.append(c_test_y)\r\n        pred_y.append(c_pred_y)\r\n\r\n    print '-' * 10 + 'FINAL EVALUATION' + '-' * 10 + '\\n'\r\n    true_y = np.concatenate(true_y)\r\n    pred_y = np.concatenate(pred_y)\r\n    print 'Testing Accuracy: {}'.format(accuracy_score(true_y, pred_y))\r\n    print classification_report(true_y, pred_y)\r\n\r\n\r\ndef save_data():\r\n    print '-' * 10 + 'SAVING DATA TO DISK' + '-' * 10 + '\\n'\r\n\r\n    x, y, test_x, test_y = load_dataset(args.train_feat, args.train_label, args.test_feat, args.train_label)\r\n    if test_x is None:\r\n        print 'Splitting train/test data with ratio {}/{}'.format(1 - args.test_ratio, args.test_ratio)\r\n        x, test_x, y, test_y = train_test_split(x, y, test_size=args.test_ratio, stratify=y)\r\n\r\n    # need to partition target and shadow model data\r\n    assert len(x) > 2 * args.target_data_size\r\n\r\n    target_data_indices, shadow_indices = get_data_indices(len(x), target_train_size=args.target_data_size)\r\n    np.savez(MODEL_PATH + 'data_indices.npz', target_data_indices, shadow_indices)\r\n\r\n    # target model's data\r\n    print 'Saving data for target model'\r\n    train_x, train_y = x[target_data_indices], y[target_data_indices]\r\n    size = len(target_data_indices)\r\n    if size < len(test_x):\r\n        test_x = test_x[:size]\r\n        test_y = test_y[:size]\r\n    # save target data\r\n    np.savez(DATA_PATH + 'target_data.npz', train_x, train_y, test_x, test_y)\r\n\r\n    # shadow model's data\r\n    target_size = len(target_data_indices)\r\n    shadow_x, shadow_y = x[shadow_indices], y[shadow_indices]\r\n    shadow_indices = np.arange(len(shadow_indices))\r\n\r\n    for i in xrange(args.n_shadow):\r\n        print 'Saving data for shadow model {}'.format(i)\r\n        shadow_i_indices = np.random.choice(shadow_indices, 2 * target_size, replace=False)\r\n        shadow_i_x, shadow_i_y = shadow_x[shadow_i_indices], shadow_y[shadow_i_indices]\r\n        train_x, train_y = shadow_i_x[:target_size], shadow_i_y[:target_size]\r\n        test_x, test_y = shadow_i_x[target_size:], shadow_i_y[target_size:]\r\n        np.savez(DATA_PATH + 'shadow{}_data.npz'.format(i), train_x, train_y, test_x, test_y)\r\n\r\n\r\ndef load_data(data_name):\r\n    with np.load(DATA_PATH + data_name) as f:\r\n        train_x, train_y, test_x, test_y = [f['arr_%d' % i] for i in range(len(f.files))]\r\n    return train_x, train_y, test_x, test_y\r\n\r\n\r\ndef attack_experiment():\r\n    print '-' * 10 + 'TRAIN TARGET' + '-' * 10 + '\\n'\r\n    dataset = load_data('target_data.npz')\r\n    attack_test_x, attack_test_y, test_classes = train_target_model(\r\n        dataset=dataset,\r\n        epochs=args.target_epochs,\r\n        batch_size=args.target_batch_size,\r\n        learning_rate=args.target_learning_rate,\r\n        n_hidden=args.target_n_hidden,\r\n        l2_ratio=args.target_l2_ratio,\r\n        model=args.target_model,\r\n        save=args.save_model)\r\n\r\n    print '-' * 10 + 'TRAIN SHADOW' + '-' * 10 + '\\n'\r\n    attack_train_x, attack_train_y, train_classes = train_shadow_models(\r\n        epochs=args.target_epochs,\r\n        batch_size=args.target_batch_size,\r\n        learning_rate=args.target_learning_rate,\r\n        n_shadow=args.n_shadow,\r\n        n_hidden=args.target_n_hidden,\r\n        l2_ratio=args.target_l2_ratio,\r\n        model=args.target_model,\r\n        save=args.save_model)\r\n\r\n    print '-' * 10 + 'TRAIN ATTACK' + '-' * 10 + '\\n'\r\n    dataset = (attack_train_x, attack_train_y, attack_test_x, attack_test_y)\r\n    train_attack_model(\r\n        dataset=dataset,\r\n        epochs=args.attack_epochs,\r\n        batch_size=args.attack_batch_size,\r\n        learning_rate=args.attack_learning_rate,\r\n        n_hidden=args.attack_n_hidden,\r\n        l2_ratio=args.attack_l2_ratio,\r\n        model=args.attack_model,\r\n        classes=(train_classes, test_classes))\r\n\r\n\r\nif __name__ == '__main__':\r\n    parser = argparse.ArgumentParser()\r\n    parser.add_argument('train_feat', type=str)\r\n    parser.add_argument('train_label', type=str)\r\n    parser.add_argument('--test_feat', type=str, default=None)\r\n    parser.add_argument('--test_label', type=str, default=None)\r\n    parser.add_argument('--save_model', type=int, default=0)\r\n    parser.add_argument('--save_data', type=int, default=0)\r\n    # if test not give, train test split configuration\r\n    parser.add_argument('--test_ratio', type=float, default=0.3)\r\n    # target and shadow model configuration\r\n    parser.add_argument('--n_shadow', type=int, default=10)\r\n    parser.add_argument('--target_data_size', type=int, default=int(1e4))   # number of data point used in target model\r\n    parser.add_argument('--target_model', type=str, default='nn')\r\n    parser.add_argument('--target_learning_rate', type=float, default=0.01)\r\n    parser.add_argument('--target_batch_size', type=int, default=100)\r\n    parser.add_argument('--target_n_hidden', type=int, default=50)\r\n    parser.add_argument('--target_epochs', type=int, default=50)\r\n    parser.add_argument('--target_l2_ratio', type=float, default=1e-6)\r\n\r\n    # attack model configuration\r\n    parser.add_argument('--attack_model', type=str, default='softmax')\r\n    parser.add_argument('--attack_learning_rate', type=float, default=0.01)\r\n    parser.add_argument('--attack_batch_size', type=int, default=100)\r\n    parser.add_argument('--attack_n_hidden', type=int, default=50)\r\n    parser.add_argument('--attack_epochs', type=int, default=50)\r\n    parser.add_argument('--attack_l2_ratio', type=float, default=1e-6)\r\n\r\n    # parse configuration\r\n    args = parser.parse_args()\r\n    print vars(args)\r\n    if args.save_data:\r\n        save_data()\r\n    else:\r\n        attack_experiment()\r\n"
  },
  {
    "path": "classifier.py",
    "content": "from sklearn.metrics import classification_report, accuracy_score\r\nimport theano.tensor as T\r\nimport numpy as np\r\nimport lasagne\r\nimport theano\r\nimport argparse\r\n\r\n\r\ndef iterate_minibatches(inputs, targets, batch_size, shuffle=True):\r\n    assert len(inputs) == len(targets)\r\n    if shuffle:\r\n        indices = np.arange(len(inputs))\r\n        np.random.shuffle(indices)\r\n\r\n    start_idx = None\r\n    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):\r\n        if shuffle:\r\n            excerpt = indices[start_idx:start_idx + batch_size]\r\n        else:\r\n            excerpt = slice(start_idx, start_idx + batch_size)\r\n        yield inputs[excerpt], targets[excerpt]\r\n\r\n    if start_idx is not None and start_idx + batch_size < len(inputs):\r\n        excerpt = indices[start_idx + batch_size:] if shuffle else slice(start_idx + batch_size, len(inputs))\r\n        yield inputs[excerpt], targets[excerpt]\r\n\r\n\r\ndef get_nn_model(n_in, n_hidden, n_out):\r\n    net = dict()\r\n    net['input'] = lasagne.layers.InputLayer((None, n_in))\r\n    net['fc'] = lasagne.layers.DenseLayer(\r\n        net['input'],\r\n        num_units=n_hidden,\r\n        nonlinearity=lasagne.nonlinearities.tanh)\r\n    net['output'] = lasagne.layers.DenseLayer(\r\n        net['fc'],\r\n        num_units=n_out,\r\n        nonlinearity=lasagne.nonlinearities.softmax)\r\n    return net\r\n\r\n\r\ndef get_softmax_model(n_in, n_out):\r\n    net = dict()\r\n    net['input'] = lasagne.layers.InputLayer((None, n_in))\r\n    net['output'] = lasagne.layers.DenseLayer(\r\n        net['input'],\r\n        num_units=n_out,\r\n        nonlinearity=lasagne.nonlinearities.softmax)\r\n    return net\r\n\r\n\r\ndef train(dataset, n_hidden=50, batch_size=100, epochs=100, learning_rate=0.01, model='nn', l2_ratio=1e-7,\r\n          rtn_layer=True):\r\n    train_x, train_y, test_x, test_y = dataset\r\n    n_in = train_x.shape[1]\r\n    n_out = len(np.unique(train_y))\r\n\r\n    if batch_size > len(train_y):\r\n        batch_size = len(train_y)\r\n\r\n    print 'Building model with {} training data, {} classes...'.format(len(train_x), n_out)\r\n    input_var = T.matrix('x')\r\n    target_var = T.ivector('y')\r\n    if model == 'nn':\r\n        print 'Using neural network...'\r\n        net = get_nn_model(n_in, n_hidden, n_out)\r\n    else:\r\n        print 'Using softmax regression...'\r\n        net = get_softmax_model(n_in, n_out)\r\n\r\n    net['input'].input_var = input_var\r\n    output_layer = net['output']\r\n\r\n    # create loss function\r\n    prediction = lasagne.layers.get_output(output_layer)\r\n    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)\r\n    loss = loss.mean() + l2_ratio * lasagne.regularization.regularize_network_params(output_layer,\r\n                                                                                 lasagne.regularization.l2)\r\n    # create parameter update expressions\r\n    params = lasagne.layers.get_all_params(output_layer, trainable=True)\r\n    updates = lasagne.updates.adam(loss, params, learning_rate=learning_rate)\r\n    train_fn = theano.function([input_var, target_var], loss, updates=updates)\r\n    # use trained network for predictions\r\n    test_prediction = lasagne.layers.get_output(output_layer, deterministic=True)\r\n    test_fn = theano.function([input_var], test_prediction)\r\n\r\n    print 'Training...'\r\n    for epoch in range(epochs):\r\n        loss = 0\r\n        for input_batch, target_batch in iterate_minibatches(train_x, train_y, batch_size):\r\n            loss += train_fn(input_batch, target_batch)\r\n        loss = round(loss, 3)\r\n        print 'Epoch {}, train loss {}'.format(epoch, loss)\r\n\r\n    pred_y = []\r\n    for input_batch, _ in iterate_minibatches(train_x, train_y, batch_size, shuffle=False):\r\n        pred = test_fn(input_batch)\r\n        pred_y.append(np.argmax(pred, axis=1))\r\n    pred_y = np.concatenate(pred_y)\r\n\r\n    print 'Training Accuracy: {}'.format(accuracy_score(train_y, pred_y))\r\n    print classification_report(train_y, pred_y)\r\n\r\n    if test_x is not None:\r\n        print 'Testing...'\r\n        pred_y = []\r\n\r\n        if batch_size > len(test_y):\r\n            batch_size = len(test_y)\r\n\r\n        for input_batch, _ in iterate_minibatches(test_x, test_y, batch_size, shuffle=False):\r\n            pred = test_fn(input_batch)\r\n            pred_y.append(np.argmax(pred, axis=1))\r\n        pred_y = np.concatenate(pred_y)\r\n        print 'Testing Accuracy: {}'.format(accuracy_score(test_y, pred_y))\r\n        print classification_report(test_y, pred_y)\r\n\r\n    # return the query function\r\n    if rtn_layer:\r\n        return output_layer\r\n    else:\r\n        return pred_y\r\n\r\n\r\ndef load_dataset(train_feat, train_label, test_feat=None, test_label=None):\r\n    train_x = np.genfromtxt(train_feat, delimiter=',', dtype='float32')\r\n    train_y = np.genfromtxt(train_label, dtype='int32')\r\n    min_y = np.min(train_y)\r\n    train_y -= min_y\r\n    if test_feat is not None and test_label is not None:\r\n        test_x = np.genfromtxt(train_feat, delimiter=',', dtype='float32')\r\n        test_y = np.genfromtxt(train_label, dtype='int32')\r\n        test_y -= min_y\r\n    else:\r\n        test_x = None\r\n        test_y = None\r\n    return train_x, train_y, test_x, test_y\r\n\r\n\r\ndef main():\r\n    parser = argparse.ArgumentParser()\r\n    parser.add_argument('train_feat', type=str)\r\n    parser.add_argument('train_label', type=str)\r\n    parser.add_argument('--test_feat', type=str, default=None)\r\n    parser.add_argument('--test_label', type=str, default=None)\r\n    parser.add_argument('--model', type=str, default='nn')\r\n    parser.add_argument('--learning_rate', type=float, default=0.01)\r\n    parser.add_argument('--batch_size', type=int, default=100)\r\n    parser.add_argument('--n_hidden', type=int, default=50)\r\n    parser.add_argument('--epochs', type=int, default=100)\r\n    args = parser.parse_args()\r\n    print vars(args)\r\n    dataset = load_dataset(args.train_feat, args.train_label, args.test_feat, args.train_label)\r\n    train(dataset,\r\n          model=args.model,\r\n          learning_rate=args.learning_rate,\r\n          batch_size=args.batch_size,\r\n          n_hidden=args.n_hidden,\r\n          epochs=args.epochs)\r\n\r\n\r\nif __name__ == '__main__':\r\n    main()\r\n"
  }
]