[
  {
    "path": "README.md",
    "content": "# DHCN\n\nCodes for AAAI 2021 paper 'Self-Supervised Hypergraph Convolutional Networks for Session-based Recommendation'.\n\n### The latest version of our paper is available at: \nhttps://ojs.aaai.org/index.php/AAAI/article/view/16578\n\nEnvironments: Python3, Pytorch 1.6.0, Numpy 1.18.1, numba\n\nDatasets are available at Dropbox: https://www.dropbox.com/sh/j12um64gsig5wqk/AAD4Vov6hUGwbLoVxh3wASg_a?dl=0 The datasets are already preprocessed and encoded by pickle.\n\nFor Diginetica, the best beta value is 0.01; for Tmall, the best beta value is 0.02.\n\nSome people may encounter a cudaError in line 50 or line 74 when running our codes if your numpy and pytorch version are different with ours. Currently, we haven't found the solution to resolve the version problem. If you have this problem, please try to change numpy and pytorch version same with ours.\n"
  },
  {
    "path": "main.py",
    "content": "import argparse\nimport pickle\nimport time\nfrom util import Data, split_validation\nfrom model import *\nimport os\n\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--dataset', default='sample', help='dataset name: diginetica/Nowplaying/sample')\nparser.add_argument('--epoch', type=int, default=30, help='number of epochs to train for')\nparser.add_argument('--batchSize', type=int, default=100, help='input batch size')\nparser.add_argument('--embSize', type=int, default=100, help='embedding size')\nparser.add_argument('--l2', type=float, default=1e-5, help='l2 penalty')\nparser.add_argument('--lr', type=float, default=0.001, help='learning rate')\nparser.add_argument('--layer', type=float, default=3, help='the number of layer used')\nparser.add_argument('--beta', type=float, default=0.01, help='ssl task maginitude')\nparser.add_argument('--filter', type=bool, default=False, help='filter incidence matrix')\n\nopt = parser.parse_args()\nprint(opt)\n# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n# torch.cuda.set_device(1)\n\ndef main():\n    train_data = pickle.load(open('../datasets/' + opt.dataset + '/train.txt', 'rb'))\n    test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))\n\n    if opt.dataset == 'diginetica':\n        n_node = 43097\n    elif opt.dataset == 'Tmall':\n        n_node = 40727\n    elif opt.dataset == 'Nowplaying':\n        n_node = 60416\n    else:\n        n_node = 309\n    train_data = Data(train_data, shuffle=True, n_node=n_node)\n    test_data = Data(test_data, shuffle=True, n_node=n_node)\n    model = trans_to_cuda(DHCN(adjacency=train_data.adjacency,n_node=n_node,lr=opt.lr, l2=opt.l2, beta=opt.beta, layers=opt.layer,emb_size=opt.embSize, batch_size=opt.batchSize,dataset=opt.dataset))\n\n    top_K = [5, 10, 20]\n    best_results = {}\n    for K in top_K:\n        best_results['epoch%d' % K] = [0, 0]\n        best_results['metric%d' % K] = [0, 0]\n\n    for epoch in range(opt.epoch):\n        print('-------------------------------------------------------')\n        print('epoch: ', epoch)\n        metrics, total_loss = train_test(model, train_data, test_data)\n        for K in top_K:\n            metrics['hit%d' % K] = np.mean(metrics['hit%d' % K]) * 100\n            metrics['mrr%d' % K] = np.mean(metrics['mrr%d' % K]) * 100\n            if best_results['metric%d' % K][0] < metrics['hit%d' % K]:\n                best_results['metric%d' % K][0] = metrics['hit%d' % K]\n                best_results['epoch%d' % K][0] = epoch\n            if best_results['metric%d' % K][1] < metrics['mrr%d' % K]:\n                best_results['metric%d' % K][1] = metrics['mrr%d' % K]\n                best_results['epoch%d' % K][1] = epoch\n        print(metrics)\n        for K in top_K:\n            print('train_loss:\\t%.4f\\tRecall@%d: %.4f\\tMRR%d: %.4f\\tEpoch: %d,  %d' %\n                  (total_loss, K, best_results['metric%d' % K][0], K, best_results['metric%d' % K][1],\n                   best_results['epoch%d' % K][0], best_results['epoch%d' % K][1]))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "model.py",
    "content": "import datetime\nimport math\nimport numpy as np\nimport torch\nfrom torch import nn, backends\nfrom torch.nn import Module, Parameter\nimport torch.nn.functional as F\nimport torch.sparse\nfrom scipy.sparse import coo\nimport time\nfrom numba import jit\nimport heapq\n\ndef trans_to_cuda(variable):\n    if torch.cuda.is_available():\n        return variable.cuda()\n    else:\n        return variable\ndef trans_to_cpu(variable):\n    if torch.cuda.is_available():\n        return variable.cpu()\n    else:\n        return variable\n\nclass HyperConv(Module):\n    def __init__(self, layers,dataset,emb_size=100):\n        super(HyperConv, self).__init__()\n        self.emb_size = emb_size\n        self.layers = layers\n        self.dataset = dataset\n\n    def forward(self, adjacency, embedding):\n        item_embeddings = embedding\n        item_embedding_layer0 = item_embeddings\n        final = [item_embedding_layer0]\n        for i in range(self.layers):\n            item_embeddings = torch.sparse.mm(trans_to_cuda(adjacency), item_embeddings)\n            final.append(item_embeddings)\n      #  final1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in final]))\n      #  item_embeddings = torch.sum(final1, 0)\n        item_embeddings = np.sum(final, 0) / (self.layers+1)\n        return item_embeddings\n\n\nclass LineConv(Module):\n    def __init__(self, layers,batch_size,emb_size=100):\n        super(LineConv, self).__init__()\n        self.emb_size = emb_size\n        self.batch_size = batch_size\n        self.layers = layers\n    def forward(self, item_embedding, D, A, session_item, session_len):\n        zeros = torch.cuda.FloatTensor(1,self.emb_size).fill_(0)\n        # zeros = torch.zeros([1,self.emb_size])\n        item_embedding = torch.cat([zeros, item_embedding], 0)\n        seq_h = []\n        for i in torch.arange(len(session_item)):\n            seq_h.append(torch.index_select(item_embedding, 0, session_item[i]))\n        seq_h1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in seq_h]))\n        session_emb_lgcn = torch.div(torch.sum(seq_h1, 1), session_len)\n        session = [session_emb_lgcn]\n        DA = torch.mm(D, A).float()\n        for i in range(self.layers):\n            session_emb_lgcn = torch.mm(DA, session_emb_lgcn)\n            session.append(session_emb_lgcn)\n        #session1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in session]))\n        #session_emb_lgcn = torch.sum(session1, 0)\n        session_emb_lgcn = np.sum(session, 0)/ (self.layers+1)\n        return session_emb_lgcn\n\n\nclass DHCN(Module):\n    def __init__(self, adjacency, n_node,lr, layers,l2, beta,dataset,emb_size=100, batch_size=100):\n        super(DHCN, self).__init__()\n        self.emb_size = emb_size\n        self.batch_size = batch_size\n        self.n_node = n_node\n        self.L2 = l2\n        self.lr = lr\n        self.layers = layers\n        self.beta = beta\n        self.dataset = dataset\n\n        values = adjacency.data\n        indices = np.vstack((adjacency.row, adjacency.col))\n        if dataset == 'Nowplaying':\n            index_fliter = (values < 0.05).nonzero()\n            values = np.delete(values, index_fliter)\n            indices1 = np.delete(indices[0], index_fliter)\n            indices2 = np.delete(indices[1], index_fliter)\n            indices = [indices1, indices2]\n        i = torch.LongTensor(indices)\n        v = torch.FloatTensor(values)\n        shape = adjacency.shape\n        adjacency = torch.sparse.FloatTensor(i, v, torch.Size(shape))\n        self.adjacency = adjacency\n        self.embedding = nn.Embedding(self.n_node, self.emb_size)\n        self.pos_embedding = nn.Embedding(200, self.emb_size)\n        self.HyperGraph = HyperConv(self.layers,dataset)\n        self.LineGraph = LineConv(self.layers, self.batch_size)\n        self.w_1 = nn.Linear(2 * self.emb_size, self.emb_size)\n        self.w_2 = nn.Parameter(torch.Tensor(self.emb_size, 1))\n        self.glu1 = nn.Linear(self.emb_size, self.emb_size)\n        self.glu2 = nn.Linear(self.emb_size, self.emb_size, bias=False)\n        self.loss_function = nn.CrossEntropyLoss()\n        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)\n        self.init_parameters()\n\n    def init_parameters(self):\n        stdv = 1.0 / math.sqrt(self.emb_size)\n        for weight in self.parameters():\n            weight.data.uniform_(-stdv, stdv)\n\n     \n    def generate_sess_emb(self,item_embedding, session_item, session_len, reversed_sess_item, mask):\n        zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0)\n        # zeros = torch.zeros(1, self.emb_size)\n        item_embedding = torch.cat([zeros, item_embedding], 0)\n        get = lambda i: item_embedding[reversed_sess_item[i]]\n        seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0)\n        # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size)\n        for i in torch.arange(session_item.shape[0]):\n            seq_h[i] = get(i)\n        hs = torch.div(torch.sum(seq_h, 1), session_len)\n        mask = mask.float().unsqueeze(-1)\n        len = seq_h.shape[1]\n        pos_emb = self.pos_embedding.weight[:len]\n        pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1)\n\n        hs = hs.unsqueeze(-2).repeat(1, len, 1)\n        nh = self.w_1(torch.cat([pos_emb, seq_h], -1))\n        nh = torch.tanh(nh)\n        nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs))\n        beta = torch.matmul(nh, self.w_2)\n        beta = beta * mask\n        select = torch.sum(beta * seq_h, 1)\n        return select\n\n    def generate_sess_emb_npos(self,item_embedding, session_item, session_len, reversed_sess_item, mask):\n        zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0)\n        # zeros = torch.zeros(1, self.emb_size)\n        item_embedding = torch.cat([zeros, item_embedding], 0)\n        get = lambda i: item_embedding[reversed_sess_item[i]]\n        seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0)\n        # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size)\n        for i in torch.arange(session_item.shape[0]):\n            seq_h[i] = get(i)\n        hs = torch.div(torch.sum(seq_h, 1), session_len)\n        mask = mask.float().unsqueeze(-1)\n        len = seq_h.shape[1]\n        # pos_emb = self.pos_embedding.weight[:len]\n        # pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1)\n\n        hs = hs.unsqueeze(-2).repeat(1, len, 1)\n        nh = seq_h\n        nh = torch.tanh(nh)\n        nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs))\n        beta = torch.matmul(nh, self.w_2)\n        beta = beta * mask\n        select = torch.sum(beta * seq_h, 1)\n        return select\n\n    def SSL(self, sess_emb_hgnn, sess_emb_lgcn):\n        def row_shuffle(embedding):\n            corrupted_embedding = embedding[torch.randperm(embedding.size()[0])]\n            return corrupted_embedding\n        def row_column_shuffle(embedding):\n            corrupted_embedding = embedding[torch.randperm(embedding.size()[0])]\n            corrupted_embedding = corrupted_embedding[:,torch.randperm(corrupted_embedding.size()[1])]\n            return corrupted_embedding\n        def score(x1, x2):\n            return torch.sum(torch.mul(x1, x2), 1)\n\n        pos = score(sess_emb_hgnn, sess_emb_lgcn)\n        neg1 = score(sess_emb_lgcn, row_column_shuffle(sess_emb_hgnn))\n        one = torch.cuda.FloatTensor(neg1.shape[0]).fill_(1)\n        # one = zeros = torch.ones(neg1.shape[0])\n        con_loss = torch.sum(-torch.log(1e-8 + torch.sigmoid(pos))-torch.log(1e-8 + (one - torch.sigmoid(neg1))))\n        return con_loss\n\n    def forward(self, session_item, session_len, D, A, reversed_sess_item, mask):\n        item_embeddings_hg = self.HyperGraph(self.adjacency, self.embedding.weight)\n        if self.dataset == 'Tmall':\n            sess_emb_hgnn = self.generate_sess_emb_npos(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask)\n        else:\n            sess_emb_hgnn = self.generate_sess_emb(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask)\n        session_emb_lg = self.LineGraph(self.embedding.weight, D, A, session_item, session_len)\n        con_loss = self.SSL(sess_emb_hgnn, session_emb_lg)\n        return item_embeddings_hg, sess_emb_hgnn, self.beta*con_loss\n\n\n@jit(nopython=True)\ndef find_k_largest(K, candidates):\n    n_candidates = []\n    for iid, score in enumerate(candidates[:K]):\n        n_candidates.append((score, iid))\n    heapq.heapify(n_candidates)\n    for iid, score in enumerate(candidates[K:]):\n        if score > n_candidates[0][0]:\n            heapq.heapreplace(n_candidates, (score, iid + K))\n    n_candidates.sort(key=lambda d: d[0], reverse=True)\n    ids = [item[1] for item in n_candidates]\n    # k_largest_scores = [item[0] for item in n_candidates]\n    return ids#, k_largest_scores\n\ndef forward(model, i, data):\n    tar, session_len, session_item, reversed_sess_item, mask = data.get_slice(i)\n    A_hat, D_hat = data.get_overlap(session_item)\n    session_item = trans_to_cuda(torch.Tensor(session_item).long())\n    session_len = trans_to_cuda(torch.Tensor(session_len).long())\n    A_hat = trans_to_cuda(torch.Tensor(A_hat))\n    D_hat = trans_to_cuda(torch.Tensor(D_hat))\n    tar = trans_to_cuda(torch.Tensor(tar).long())\n    mask = trans_to_cuda(torch.Tensor(mask).long())\n    reversed_sess_item = trans_to_cuda(torch.Tensor(reversed_sess_item).long())\n    item_emb_hg, sess_emb_hgnn, con_loss = model(session_item, session_len, D_hat, A_hat, reversed_sess_item, mask)\n    scores = torch.mm(sess_emb_hgnn, torch.transpose(item_emb_hg, 1,0))\n    return tar, scores, con_loss\n\n\ndef train_test(model, train_data, test_data):\n    print('start training: ', datetime.datetime.now())\n    torch.autograd.set_detect_anomaly(True)\n    total_loss = 0.0\n    slices = train_data.generate_batch(model.batch_size)\n    for i in slices:\n        model.zero_grad()\n        targets, scores, con_loss = forward(model, i, train_data)\n        loss = model.loss_function(scores + 1e-8, targets)\n        loss = loss + con_loss\n        loss.backward()\n#        print(loss.item())\n        model.optimizer.step()\n        total_loss += loss\n    print('\\tLoss:\\t%.3f' % total_loss)\n    top_K = [5, 10, 20]\n    metrics = {}\n    for K in top_K:\n        metrics['hit%d' % K] = []\n        metrics['mrr%d' % K] = []\n    print('start predicting: ', datetime.datetime.now())\n\n    model.eval()\n    slices = test_data.generate_batch(model.batch_size)\n    for i in slices:\n        tar, scores, con_loss = forward(model, i, test_data)\n        scores = trans_to_cpu(scores).detach().numpy()\n        index = []\n        for idd in range(model.batch_size):\n            index.append(find_k_largest(20, scores[idd]))\n        index = np.array(index)\n        tar = trans_to_cpu(tar).detach().numpy()\n        for K in top_K:\n            for prediction, target in zip(index[:, :K], tar):\n                metrics['hit%d' %K].append(np.isin(target, prediction))\n                if len(np.where(prediction == target)[0]) == 0:\n                    metrics['mrr%d' %K].append(0)\n                else:\n                    metrics['mrr%d' %K].append(1 / (np.where(prediction == target)[0][0]+1))\n    return metrics, total_loss\n\n\n"
  },
  {
    "path": "util.py",
    "content": "import numpy as np\nfrom scipy.sparse import csr_matrix\nfrom operator import itemgetter\n\ndef data_masks(all_sessions, n_node):\n    indptr, indices, data = [], [], []\n    indptr.append(0)\n    for j in range(len(all_sessions)):\n        session = np.unique(all_sessions[j])\n        length = len(session)\n        s = indptr[-1]\n        indptr.append((s + length))\n        for i in range(length):\n            indices.append(session[i]-1)\n            data.append(1)\n    matrix = csr_matrix((data, indices, indptr), shape=(len(all_sessions), n_node))\n\n    return matrix\n\ndef split_validation(train_set, valid_portion):\n    train_set_x, train_set_y = train_set\n    n_samples = len(train_set_x)\n    sidx = np.arange(n_samples, dtype='int32')\n    np.random.shuffle(sidx)\n    n_train = int(np.round(n_samples * (1. - valid_portion)))\n    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]\n    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]\n    train_set_x = [train_set_x[s] for s in sidx[:n_train]]\n    train_set_y = [train_set_y[s] for s in sidx[:n_train]]\n\n    return (train_set_x, train_set_y), (valid_set_x, valid_set_y)\n\nclass Data():\n    def __init__(self, data, shuffle=False, n_node=None):\n        self.raw = np.asarray(data[0])\n        H_T = data_masks(self.raw, n_node)\n        BH_T = H_T.T.multiply(1.0/H_T.sum(axis=1).reshape(1, -1))\n        BH_T = BH_T.T\n        H = H_T.T\n        DH = H.T.multiply(1.0/H.sum(axis=1).reshape(1, -1))\n        DH = DH.T\n        DHBH_T = np.dot(DH,BH_T)\n\n        self.adjacency = DHBH_T.tocoo()\n        self.n_node = n_node\n        self.targets = np.asarray(data[1])\n        self.length = len(self.raw)\n        self.shuffle = shuffle\n\n    def get_overlap(self, sessions):\n        matrix = np.zeros((len(sessions), len(sessions)))\n        for i in range(len(sessions)):\n            seq_a = set(sessions[i])\n            seq_a.discard(0)\n            for j in range(i+1, len(sessions)):\n                seq_b = set(sessions[j])\n                seq_b.discard(0)\n                overlap = seq_a.intersection(seq_b)\n                ab_set = seq_a | seq_b\n                matrix[i][j] = float(len(overlap))/float(len(ab_set))\n                matrix[j][i] = matrix[i][j]\n        matrix = matrix + np.diag([1.0]*len(sessions))\n        degree = np.sum(np.array(matrix), 1)\n        degree = np.diag(1.0/degree)\n        return matrix, degree\n\n    def generate_batch(self, batch_size):\n        if self.shuffle:\n            shuffled_arg = np.arange(self.length)\n            np.random.shuffle(shuffled_arg)\n            self.raw = self.raw[shuffled_arg]\n            self.targets = self.targets[shuffled_arg]\n        n_batch = int(self.length / batch_size)\n        if self.length % batch_size != 0:\n            n_batch += 1\n        slices = np.split(np.arange(n_batch * batch_size), n_batch)\n        slices[-1] = np.arange(self.length-batch_size, self.length)\n        return slices\n\n    def get_slice(self, index):\n        items, num_node = [], []\n        inp = self.raw[index]\n        for session in inp:\n            num_node.append(len(np.nonzero(session)[0]))\n        max_n_node = np.max(num_node)\n        session_len = []\n        reversed_sess_item = []\n        mask = []\n        for session in inp:\n            nonzero_elems = np.nonzero(session)[0]\n            session_len.append([len(nonzero_elems)])\n            items.append(session + (max_n_node - len(nonzero_elems)) * [0])\n            mask.append([1]*len(nonzero_elems) + (max_n_node - len(nonzero_elems)) * [0])\n            reversed_sess_item.append(list(reversed(session)) + (max_n_node - len(nonzero_elems)) * [0])\n\n\n        return self.targets[index]-1, session_len,items, reversed_sess_item, mask\n"
  }
]