Repository: xiaxin1998/DHCN Branch: main Commit: b39ec892f8be Files: 4 Total size: 18.4 KB Directory structure: gitextract_ga75zb8t/ ├── README.md ├── main.py ├── model.py └── util.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # DHCN Codes for AAAI 2021 paper 'Self-Supervised Hypergraph Convolutional Networks for Session-based Recommendation'. ### The latest version of our paper is available at: https://ojs.aaai.org/index.php/AAAI/article/view/16578 Environments: Python3, Pytorch 1.6.0, Numpy 1.18.1, numba Datasets are available at Dropbox: https://www.dropbox.com/sh/j12um64gsig5wqk/AAD4Vov6hUGwbLoVxh3wASg_a?dl=0 The datasets are already preprocessed and encoded by pickle. For Diginetica, the best beta value is 0.01; for Tmall, the best beta value is 0.02. Some people may encounter a cudaError in line 50 or line 74 when running our codes if your numpy and pytorch version are different with ours. Currently, we haven't found the solution to resolve the version problem. If you have this problem, please try to change numpy and pytorch version same with ours. ================================================ FILE: main.py ================================================ import argparse import pickle import time from util import Data, split_validation from model import * import os parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='sample', help='dataset name: diginetica/Nowplaying/sample') parser.add_argument('--epoch', type=int, default=30, help='number of epochs to train for') parser.add_argument('--batchSize', type=int, default=100, help='input batch size') parser.add_argument('--embSize', type=int, default=100, help='embedding size') parser.add_argument('--l2', type=float, default=1e-5, help='l2 penalty') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--layer', type=float, default=3, help='the number of layer used') parser.add_argument('--beta', type=float, default=0.01, help='ssl task maginitude') parser.add_argument('--filter', type=bool, default=False, help='filter incidence matrix') opt = parser.parse_args() print(opt) # os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' # torch.cuda.set_device(1) def main(): train_data = pickle.load(open('../datasets/' + opt.dataset + '/train.txt', 'rb')) test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb')) if opt.dataset == 'diginetica': n_node = 43097 elif opt.dataset == 'Tmall': n_node = 40727 elif opt.dataset == 'Nowplaying': n_node = 60416 else: n_node = 309 train_data = Data(train_data, shuffle=True, n_node=n_node) test_data = Data(test_data, shuffle=True, n_node=n_node) model = trans_to_cuda(DHCN(adjacency=train_data.adjacency,n_node=n_node,lr=opt.lr, l2=opt.l2, beta=opt.beta, layers=opt.layer,emb_size=opt.embSize, batch_size=opt.batchSize,dataset=opt.dataset)) top_K = [5, 10, 20] best_results = {} for K in top_K: best_results['epoch%d' % K] = [0, 0] best_results['metric%d' % K] = [0, 0] for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) metrics, total_loss = train_test(model, train_data, test_data) for K in top_K: metrics['hit%d' % K] = np.mean(metrics['hit%d' % K]) * 100 metrics['mrr%d' % K] = np.mean(metrics['mrr%d' % K]) * 100 if best_results['metric%d' % K][0] < metrics['hit%d' % K]: best_results['metric%d' % K][0] = metrics['hit%d' % K] best_results['epoch%d' % K][0] = epoch if best_results['metric%d' % K][1] < metrics['mrr%d' % K]: best_results['metric%d' % K][1] = metrics['mrr%d' % K] best_results['epoch%d' % K][1] = epoch print(metrics) for K in top_K: print('train_loss:\t%.4f\tRecall@%d: %.4f\tMRR%d: %.4f\tEpoch: %d, %d' % (total_loss, K, best_results['metric%d' % K][0], K, best_results['metric%d' % K][1], best_results['epoch%d' % K][0], best_results['epoch%d' % K][1])) if __name__ == '__main__': main() ================================================ FILE: model.py ================================================ import datetime import math import numpy as np import torch from torch import nn, backends from torch.nn import Module, Parameter import torch.nn.functional as F import torch.sparse from scipy.sparse import coo import time from numba import jit import heapq def trans_to_cuda(variable): if torch.cuda.is_available(): return variable.cuda() else: return variable def trans_to_cpu(variable): if torch.cuda.is_available(): return variable.cpu() else: return variable class HyperConv(Module): def __init__(self, layers,dataset,emb_size=100): super(HyperConv, self).__init__() self.emb_size = emb_size self.layers = layers self.dataset = dataset def forward(self, adjacency, embedding): item_embeddings = embedding item_embedding_layer0 = item_embeddings final = [item_embedding_layer0] for i in range(self.layers): item_embeddings = torch.sparse.mm(trans_to_cuda(adjacency), item_embeddings) final.append(item_embeddings) # final1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in final])) # item_embeddings = torch.sum(final1, 0) item_embeddings = np.sum(final, 0) / (self.layers+1) return item_embeddings class LineConv(Module): def __init__(self, layers,batch_size,emb_size=100): super(LineConv, self).__init__() self.emb_size = emb_size self.batch_size = batch_size self.layers = layers def forward(self, item_embedding, D, A, session_item, session_len): zeros = torch.cuda.FloatTensor(1,self.emb_size).fill_(0) # zeros = torch.zeros([1,self.emb_size]) item_embedding = torch.cat([zeros, item_embedding], 0) seq_h = [] for i in torch.arange(len(session_item)): seq_h.append(torch.index_select(item_embedding, 0, session_item[i])) seq_h1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in seq_h])) session_emb_lgcn = torch.div(torch.sum(seq_h1, 1), session_len) session = [session_emb_lgcn] DA = torch.mm(D, A).float() for i in range(self.layers): session_emb_lgcn = torch.mm(DA, session_emb_lgcn) session.append(session_emb_lgcn) #session1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in session])) #session_emb_lgcn = torch.sum(session1, 0) session_emb_lgcn = np.sum(session, 0)/ (self.layers+1) return session_emb_lgcn class DHCN(Module): def __init__(self, adjacency, n_node,lr, layers,l2, beta,dataset,emb_size=100, batch_size=100): super(DHCN, self).__init__() self.emb_size = emb_size self.batch_size = batch_size self.n_node = n_node self.L2 = l2 self.lr = lr self.layers = layers self.beta = beta self.dataset = dataset values = adjacency.data indices = np.vstack((adjacency.row, adjacency.col)) if dataset == 'Nowplaying': index_fliter = (values < 0.05).nonzero() values = np.delete(values, index_fliter) indices1 = np.delete(indices[0], index_fliter) indices2 = np.delete(indices[1], index_fliter) indices = [indices1, indices2] i = torch.LongTensor(indices) v = torch.FloatTensor(values) shape = adjacency.shape adjacency = torch.sparse.FloatTensor(i, v, torch.Size(shape)) self.adjacency = adjacency self.embedding = nn.Embedding(self.n_node, self.emb_size) self.pos_embedding = nn.Embedding(200, self.emb_size) self.HyperGraph = HyperConv(self.layers,dataset) self.LineGraph = LineConv(self.layers, self.batch_size) self.w_1 = nn.Linear(2 * self.emb_size, self.emb_size) self.w_2 = nn.Parameter(torch.Tensor(self.emb_size, 1)) self.glu1 = nn.Linear(self.emb_size, self.emb_size) self.glu2 = nn.Linear(self.emb_size, self.emb_size, bias=False) self.loss_function = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) self.init_parameters() def init_parameters(self): stdv = 1.0 / math.sqrt(self.emb_size) for weight in self.parameters(): weight.data.uniform_(-stdv, stdv) def generate_sess_emb(self,item_embedding, session_item, session_len, reversed_sess_item, mask): zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0) # zeros = torch.zeros(1, self.emb_size) item_embedding = torch.cat([zeros, item_embedding], 0) get = lambda i: item_embedding[reversed_sess_item[i]] seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0) # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size) for i in torch.arange(session_item.shape[0]): seq_h[i] = get(i) hs = torch.div(torch.sum(seq_h, 1), session_len) mask = mask.float().unsqueeze(-1) len = seq_h.shape[1] pos_emb = self.pos_embedding.weight[:len] pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1) hs = hs.unsqueeze(-2).repeat(1, len, 1) nh = self.w_1(torch.cat([pos_emb, seq_h], -1)) nh = torch.tanh(nh) nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs)) beta = torch.matmul(nh, self.w_2) beta = beta * mask select = torch.sum(beta * seq_h, 1) return select def generate_sess_emb_npos(self,item_embedding, session_item, session_len, reversed_sess_item, mask): zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0) # zeros = torch.zeros(1, self.emb_size) item_embedding = torch.cat([zeros, item_embedding], 0) get = lambda i: item_embedding[reversed_sess_item[i]] seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0) # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size) for i in torch.arange(session_item.shape[0]): seq_h[i] = get(i) hs = torch.div(torch.sum(seq_h, 1), session_len) mask = mask.float().unsqueeze(-1) len = seq_h.shape[1] # pos_emb = self.pos_embedding.weight[:len] # pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1) hs = hs.unsqueeze(-2).repeat(1, len, 1) nh = seq_h nh = torch.tanh(nh) nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs)) beta = torch.matmul(nh, self.w_2) beta = beta * mask select = torch.sum(beta * seq_h, 1) return select def SSL(self, sess_emb_hgnn, sess_emb_lgcn): def row_shuffle(embedding): corrupted_embedding = embedding[torch.randperm(embedding.size()[0])] return corrupted_embedding def row_column_shuffle(embedding): corrupted_embedding = embedding[torch.randperm(embedding.size()[0])] corrupted_embedding = corrupted_embedding[:,torch.randperm(corrupted_embedding.size()[1])] return corrupted_embedding def score(x1, x2): return torch.sum(torch.mul(x1, x2), 1) pos = score(sess_emb_hgnn, sess_emb_lgcn) neg1 = score(sess_emb_lgcn, row_column_shuffle(sess_emb_hgnn)) one = torch.cuda.FloatTensor(neg1.shape[0]).fill_(1) # one = zeros = torch.ones(neg1.shape[0]) con_loss = torch.sum(-torch.log(1e-8 + torch.sigmoid(pos))-torch.log(1e-8 + (one - torch.sigmoid(neg1)))) return con_loss def forward(self, session_item, session_len, D, A, reversed_sess_item, mask): item_embeddings_hg = self.HyperGraph(self.adjacency, self.embedding.weight) if self.dataset == 'Tmall': sess_emb_hgnn = self.generate_sess_emb_npos(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask) else: sess_emb_hgnn = self.generate_sess_emb(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask) session_emb_lg = self.LineGraph(self.embedding.weight, D, A, session_item, session_len) con_loss = self.SSL(sess_emb_hgnn, session_emb_lg) return item_embeddings_hg, sess_emb_hgnn, self.beta*con_loss @jit(nopython=True) def find_k_largest(K, candidates): n_candidates = [] for iid, score in enumerate(candidates[:K]): n_candidates.append((score, iid)) heapq.heapify(n_candidates) for iid, score in enumerate(candidates[K:]): if score > n_candidates[0][0]: heapq.heapreplace(n_candidates, (score, iid + K)) n_candidates.sort(key=lambda d: d[0], reverse=True) ids = [item[1] for item in n_candidates] # k_largest_scores = [item[0] for item in n_candidates] return ids#, k_largest_scores def forward(model, i, data): tar, session_len, session_item, reversed_sess_item, mask = data.get_slice(i) A_hat, D_hat = data.get_overlap(session_item) session_item = trans_to_cuda(torch.Tensor(session_item).long()) session_len = trans_to_cuda(torch.Tensor(session_len).long()) A_hat = trans_to_cuda(torch.Tensor(A_hat)) D_hat = trans_to_cuda(torch.Tensor(D_hat)) tar = trans_to_cuda(torch.Tensor(tar).long()) mask = trans_to_cuda(torch.Tensor(mask).long()) reversed_sess_item = trans_to_cuda(torch.Tensor(reversed_sess_item).long()) item_emb_hg, sess_emb_hgnn, con_loss = model(session_item, session_len, D_hat, A_hat, reversed_sess_item, mask) scores = torch.mm(sess_emb_hgnn, torch.transpose(item_emb_hg, 1,0)) return tar, scores, con_loss def train_test(model, train_data, test_data): print('start training: ', datetime.datetime.now()) torch.autograd.set_detect_anomaly(True) total_loss = 0.0 slices = train_data.generate_batch(model.batch_size) for i in slices: model.zero_grad() targets, scores, con_loss = forward(model, i, train_data) loss = model.loss_function(scores + 1e-8, targets) loss = loss + con_loss loss.backward() # print(loss.item()) model.optimizer.step() total_loss += loss print('\tLoss:\t%.3f' % total_loss) top_K = [5, 10, 20] metrics = {} for K in top_K: metrics['hit%d' % K] = [] metrics['mrr%d' % K] = [] print('start predicting: ', datetime.datetime.now()) model.eval() slices = test_data.generate_batch(model.batch_size) for i in slices: tar, scores, con_loss = forward(model, i, test_data) scores = trans_to_cpu(scores).detach().numpy() index = [] for idd in range(model.batch_size): index.append(find_k_largest(20, scores[idd])) index = np.array(index) tar = trans_to_cpu(tar).detach().numpy() for K in top_K: for prediction, target in zip(index[:, :K], tar): metrics['hit%d' %K].append(np.isin(target, prediction)) if len(np.where(prediction == target)[0]) == 0: metrics['mrr%d' %K].append(0) else: metrics['mrr%d' %K].append(1 / (np.where(prediction == target)[0][0]+1)) return metrics, total_loss ================================================ FILE: util.py ================================================ import numpy as np from scipy.sparse import csr_matrix from operator import itemgetter def data_masks(all_sessions, n_node): indptr, indices, data = [], [], [] indptr.append(0) for j in range(len(all_sessions)): session = np.unique(all_sessions[j]) length = len(session) s = indptr[-1] indptr.append((s + length)) for i in range(length): indices.append(session[i]-1) data.append(1) matrix = csr_matrix((data, indices, indptr), shape=(len(all_sessions), n_node)) return matrix def split_validation(train_set, valid_portion): train_set_x, train_set_y = train_set n_samples = len(train_set_x) sidx = np.arange(n_samples, dtype='int32') np.random.shuffle(sidx) n_train = int(np.round(n_samples * (1. - valid_portion))) valid_set_x = [train_set_x[s] for s in sidx[n_train:]] valid_set_y = [train_set_y[s] for s in sidx[n_train:]] train_set_x = [train_set_x[s] for s in sidx[:n_train]] train_set_y = [train_set_y[s] for s in sidx[:n_train]] return (train_set_x, train_set_y), (valid_set_x, valid_set_y) class Data(): def __init__(self, data, shuffle=False, n_node=None): self.raw = np.asarray(data[0]) H_T = data_masks(self.raw, n_node) BH_T = H_T.T.multiply(1.0/H_T.sum(axis=1).reshape(1, -1)) BH_T = BH_T.T H = H_T.T DH = H.T.multiply(1.0/H.sum(axis=1).reshape(1, -1)) DH = DH.T DHBH_T = np.dot(DH,BH_T) self.adjacency = DHBH_T.tocoo() self.n_node = n_node self.targets = np.asarray(data[1]) self.length = len(self.raw) self.shuffle = shuffle def get_overlap(self, sessions): matrix = np.zeros((len(sessions), len(sessions))) for i in range(len(sessions)): seq_a = set(sessions[i]) seq_a.discard(0) for j in range(i+1, len(sessions)): seq_b = set(sessions[j]) seq_b.discard(0) overlap = seq_a.intersection(seq_b) ab_set = seq_a | seq_b matrix[i][j] = float(len(overlap))/float(len(ab_set)) matrix[j][i] = matrix[i][j] matrix = matrix + np.diag([1.0]*len(sessions)) degree = np.sum(np.array(matrix), 1) degree = np.diag(1.0/degree) return matrix, degree def generate_batch(self, batch_size): if self.shuffle: shuffled_arg = np.arange(self.length) np.random.shuffle(shuffled_arg) self.raw = self.raw[shuffled_arg] self.targets = self.targets[shuffled_arg] n_batch = int(self.length / batch_size) if self.length % batch_size != 0: n_batch += 1 slices = np.split(np.arange(n_batch * batch_size), n_batch) slices[-1] = np.arange(self.length-batch_size, self.length) return slices def get_slice(self, index): items, num_node = [], [] inp = self.raw[index] for session in inp: num_node.append(len(np.nonzero(session)[0])) max_n_node = np.max(num_node) session_len = [] reversed_sess_item = [] mask = [] for session in inp: nonzero_elems = np.nonzero(session)[0] session_len.append([len(nonzero_elems)]) items.append(session + (max_n_node - len(nonzero_elems)) * [0]) mask.append([1]*len(nonzero_elems) + (max_n_node - len(nonzero_elems)) * [0]) reversed_sess_item.append(list(reversed(session)) + (max_n_node - len(nonzero_elems)) * [0]) return self.targets[index]-1, session_len,items, reversed_sess_item, mask