From 0b9df9d798430f1fc440c6fa8a8dca2a1350d8be Mon Sep 17 00:00:00 2001 From: "Hongzhi (Steve), Chen" Date: Wed, 28 Sep 2022 10:28:24 +0800 Subject: [PATCH] [Misc] Black auto fix. (#4652) Co-authored-by: Steve --- examples/pytorch/NGCF/NGCF/main.py | 115 ++-- examples/pytorch/NGCF/NGCF/model.py | 104 ++-- .../pytorch/NGCF/NGCF/utility/batch_test.py | 76 ++- examples/pytorch/NGCF/NGCF/utility/helper.py | 37 +- .../pytorch/NGCF/NGCF/utility/load_data.py | 82 +-- examples/pytorch/NGCF/NGCF/utility/metrics.py | 33 +- examples/pytorch/NGCF/NGCF/utility/parser.py | 109 ++-- examples/pytorch/P-GNN/main.py | 160 ++++-- examples/pytorch/P-GNN/model.py | 40 +- examples/pytorch/P-GNN/utils.py | 146 +++-- examples/pytorch/infograph/utils.py | 15 +- examples/pytorch/jknet/main.py | 123 ++-- examples/pytorch/jknet/model.py | 22 +- examples/pytorch/jtnn/jtnn/__init__.py | 6 +- examples/pytorch/jtnn/jtnn/chemutils.py | 264 ++++++--- examples/pytorch/jtnn/jtnn/datautils.py | 194 ++++--- examples/pytorch/jtnn/jtnn/jtnn_enc.py | 79 ++- examples/pytorch/jtnn/jtnn/jtnn_vae.py | 256 ++++++--- .../jtnn/jtnn/line_profiler_integration.py | 14 +- examples/pytorch/jtnn/jtnn/mol_tree.py | 15 +- examples/pytorch/jtnn/jtnn/mol_tree_nx.py | 106 ++-- examples/pytorch/jtnn/jtnn/mpn.py | 116 ++-- examples/pytorch/jtnn/jtnn/nnutils.py | 23 +- examples/pytorch/jtnn/vaetrain_dgl.py | 135 +++-- examples/pytorch/label_propagation/main.py | 44 +- examples/pytorch/lda/example_20newsgroups.py | 96 ++-- examples/pytorch/lda/lda_model.py | 268 +++++---- examples/pytorch/line_graph/train.py | 99 ++-- examples/pytorch/mixhop/main.py | 266 +++++---- .../model_zoo/citation_network/conf.py | 54 +- .../model_zoo/citation_network/models.py | 279 +++++---- .../pytorch/model_zoo/citation_network/run.py | 112 ++-- .../pytorch/model_zoo/geometric/coarsening.py | 27 +- .../pytorch/model_zoo/geometric/coordinate.py | 22 +- .../pytorch/model_zoo/geometric/grid_graph.py | 35 +- examples/pytorch/model_zoo/geometric/mnist.py | 143 +++-- examples/pytorch/monet/citation.py | 163 +++--- .../multigpu/multi_gpu_graph_prediction.py | 92 +-- .../multigpu/multi_gpu_node_classification.py | 197 +++++-- examples/pytorch/mvgrl/graph/dataset.py | 141 +++-- examples/pytorch/mvgrl/graph/main.py | 100 ++-- examples/pytorch/mvgrl/graph/model.py | 43 +- examples/pytorch/mvgrl/graph/utils.py | 29 +- examples/pytorch/mvgrl/node/dataset.py | 74 ++- examples/pytorch/mvgrl/node/main.py | 89 ++- examples/pytorch/mvgrl/node/main_sample.py | 116 ++-- examples/pytorch/mvgrl/node/model.py | 13 +- examples/pytorch/node2vec/main.py | 48 +- examples/pytorch/node2vec/model.py | 98 +++- examples/pytorch/node2vec/utils.py | 44 +- examples/pytorch/ogb/cluster-gat/main.py | 313 ++++++---- .../ogb/cluster-gat/partition_utils.py | 3 +- examples/pytorch/ogb/cluster-gat/sampler.py | 12 +- examples/pytorch/ogb/cluster-sage/main.py | 214 ++++--- .../ogb/cluster-sage/partition_utils.py | 3 +- examples/pytorch/ogb/cluster-sage/sampler.py | 20 +- examples/pytorch/ogb/deepwalk/deepwalk.py | 395 +++++++++---- examples/pytorch/ogb/deepwalk/load_dataset.py | 44 +- examples/pytorch/ogb/deepwalk/model.py | 261 +++++---- examples/pytorch/ogb/deepwalk/reading_data.py | 123 ++-- examples/pytorch/ogb/deepwalk/utils.py | 4 +- examples/pytorch/ogb/directional_GSN/main.py | 283 ++++++--- .../ogb/directional_GSN/preprocessing.py | 177 +++--- examples/pytorch/ogb/line/line.py | 388 +++++++++---- examples/pytorch/ogb/line/load_dataset.py | 43 +- examples/pytorch/ogb/line/model.py | 321 ++++++++--- examples/pytorch/ogb/line/reading_data.py | 119 ++-- examples/pytorch/ogb/line/utils.py | 8 +- examples/pytorch/ogb/ngnn/main.py | 346 +++++++---- .../ogb/ogbn-arxiv/correct_and_smooth.py | 50 +- examples/pytorch/ogb/ogbn-arxiv/gat.py | 197 +++++-- examples/pytorch/ogb/ogbn-arxiv/gcn.py | 161 +++++- examples/pytorch/ogb/ogbn-arxiv/models.py | 66 ++- examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py | 271 ++++++--- examples/pytorch/ogb/ogbn-products/gat/gat.py | 307 +++++++--- .../pytorch/ogb/ogbn-products/gat/main.py | 227 +++++--- .../pytorch/ogb/ogbn-products/gat/models.py | 47 +- .../ogb/ogbn-products/graphsage/main.py | 182 ++++-- examples/pytorch/ogb/ogbn-products/mlp/mlp.py | 227 ++++++-- .../pytorch/ogb/ogbn-products/mlp/models.py | 10 +- .../pytorch/ogb/ogbn-proteins/configure.py | 58 +- examples/pytorch/ogb/ogbn-proteins/gat.py | 262 +++++++-- .../ogbn-proteins/main_proteins_full_dgl.py | 93 ++- examples/pytorch/ogb/ogbn-proteins/utils.py | 2 - examples/pytorch/ogb/seal_ogbl/main.py | 542 +++++++++++------- examples/pytorch/ogb/sign/dataset.py | 45 +- examples/pytorch/ogb/sign/sign.py | 161 ++++-- .../pytorch/ogb_lsc/MAG240M/preprocess.py | 183 ++++-- examples/pytorch/ogb_lsc/MAG240M/train.py | 264 ++++++--- .../ogb_lsc/MAG240M/train_multi_gpus.py | 303 +++++++--- examples/pytorch/ogb_lsc/PCQM4M/conv.py | 173 ++++-- examples/pytorch/ogb_lsc/PCQM4M/gnn.py | 72 ++- examples/pytorch/ogb_lsc/PCQM4M/main.py | 286 ++++++--- .../pytorch/ogb_lsc/PCQM4M/test_inference.py | 187 ++++-- examples/pytorch/pinsage/builder.py | 81 ++- examples/pytorch/pinsage/data_utils.py | 55 +- examples/pytorch/pinsage/evaluation.py | 62 +- examples/pytorch/pinsage/layers.py | 64 ++- examples/pytorch/pinsage/model_sparse.py | 136 +++-- 99 files changed, 8585 insertions(+), 4228 deletions(-) diff --git a/examples/pytorch/NGCF/NGCF/main.py b/examples/pytorch/NGCF/NGCF/main.py index e3aebffb18ee..935e43cf5bc4 100644 --- a/examples/pytorch/NGCF/NGCF/main.py +++ b/examples/pytorch/NGCF/NGCF/main.py @@ -1,23 +1,27 @@ +import os +from time import time + import torch import torch.optim as optim from model import NGCF from utility.batch_test import * from utility.helper import early_stopping -from time import time -import os + def main(args): # Step 1: Prepare graph data and device ================================================================= # if args.gpu >= 0 and torch.cuda.is_available(): - device = 'cuda:{}'.format(args.gpu) + device = "cuda:{}".format(args.gpu) else: - device = 'cpu' + device = "cpu" - g=data_generator.g - g=g.to(device) + g = data_generator.g + g = g.to(device) # Step 2: Create model and training components=========================================================== # - model = NGCF(g, args.embed_size, args.layer_size, args.mess_dropout, args.regs[0]).to(device) + model = NGCF( + g, args.embed_size, args.layer_size, args.mess_dropout, args.regs[0] + ).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 3: training epoches ============================================================================== # @@ -27,16 +31,16 @@ def main(args): loss_loger, pre_loger, rec_loger, ndcg_loger, hit_loger = [], [], [], [], [] for epoch in range(args.epoch): t1 = time() - loss, mf_loss, emb_loss = 0., 0., 0. + loss, mf_loss, emb_loss = 0.0, 0.0, 0.0 for idx in range(n_batch): users, pos_items, neg_items = data_generator.sample() - u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(g, 'user', 'item', users, - pos_items, - neg_items) + u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model( + g, "user", "item", users, pos_items, neg_items + ) - batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss(u_g_embeddings, - pos_i_g_embeddings, - neg_i_g_embeddings) + batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss( + u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings + ) optimizer.zero_grad() batch_loss.backward() optimizer.step() @@ -44,45 +48,72 @@ def main(args): loss += batch_loss mf_loss += batch_mf_loss emb_loss += batch_emb_loss - if (epoch + 1) % 10 != 0: if args.verbose > 0 and epoch % args.verbose == 0: - perf_str = 'Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]' % ( - epoch, time() - t1, loss, mf_loss, emb_loss) + perf_str = "Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]" % ( + epoch, + time() - t1, + loss, + mf_loss, + emb_loss, + ) print(perf_str) - continue #end the current epoch and move to the next epoch, let the following evaluation run every 10 epoches + continue # end the current epoch and move to the next epoch, let the following evaluation run every 10 epoches - #evaluate the model every 10 epoches + # evaluate the model every 10 epoches t2 = time() users_to_test = list(data_generator.test_set.keys()) ret = test(model, g, users_to_test) t3 = time() loss_loger.append(loss) - rec_loger.append(ret['recall']) - pre_loger.append(ret['precision']) - ndcg_loger.append(ret['ndcg']) - hit_loger.append(ret['hit_ratio']) + rec_loger.append(ret["recall"]) + pre_loger.append(ret["precision"]) + ndcg_loger.append(ret["ndcg"]) + hit_loger.append(ret["hit_ratio"]) if args.verbose > 0: - perf_str = 'Epoch %d [%.1fs + %.1fs]: train==[%.5f=%.5f + %.5f], recall=[%.5f, %.5f], ' \ - 'precision=[%.5f, %.5f], hit=[%.5f, %.5f], ndcg=[%.5f, %.5f]' % \ - (epoch, t2 - t1, t3 - t2, loss, mf_loss, emb_loss, ret['recall'][0], ret['recall'][-1], - ret['precision'][0], ret['precision'][-1], ret['hit_ratio'][0], ret['hit_ratio'][-1], - ret['ndcg'][0], ret['ndcg'][-1]) + perf_str = ( + "Epoch %d [%.1fs + %.1fs]: train==[%.5f=%.5f + %.5f], recall=[%.5f, %.5f], " + "precision=[%.5f, %.5f], hit=[%.5f, %.5f], ndcg=[%.5f, %.5f]" + % ( + epoch, + t2 - t1, + t3 - t2, + loss, + mf_loss, + emb_loss, + ret["recall"][0], + ret["recall"][-1], + ret["precision"][0], + ret["precision"][-1], + ret["hit_ratio"][0], + ret["hit_ratio"][-1], + ret["ndcg"][0], + ret["ndcg"][-1], + ) + ) print(perf_str) - cur_best_pre_0, stopping_step, should_stop = early_stopping(ret['recall'][0], cur_best_pre_0, - stopping_step, expected_order='acc', flag_step=5) + cur_best_pre_0, stopping_step, should_stop = early_stopping( + ret["recall"][0], + cur_best_pre_0, + stopping_step, + expected_order="acc", + flag_step=5, + ) # early stop if should_stop == True: break - if ret['recall'][0] == cur_best_pre_0 and args.save_flag == 1: + if ret["recall"][0] == cur_best_pre_0 and args.save_flag == 1: torch.save(model.state_dict(), args.weights_path + args.model_name) - print('save the weights in path: ', args.weights_path + args.model_name) + print( + "save the weights in path: ", + args.weights_path + args.model_name, + ) recs = np.array(rec_loger) pres = np.array(pre_loger) @@ -92,14 +123,21 @@ def main(args): best_rec_0 = max(recs[:, 0]) idx = list(recs[:, 0]).index(best_rec_0) - final_perf = "Best Iter=[%d]@[%.1f]\trecall=[%s], precision=[%s], hit=[%s], ndcg=[%s]" % \ - (idx, time() - t0, '\t'.join(['%.5f' % r for r in recs[idx]]), - '\t'.join(['%.5f' % r for r in pres[idx]]), - '\t'.join(['%.5f' % r for r in hit[idx]]), - '\t'.join(['%.5f' % r for r in ndcgs[idx]])) + final_perf = ( + "Best Iter=[%d]@[%.1f]\trecall=[%s], precision=[%s], hit=[%s], ndcg=[%s]" + % ( + idx, + time() - t0, + "\t".join(["%.5f" % r for r in recs[idx]]), + "\t".join(["%.5f" % r for r in pres[idx]]), + "\t".join(["%.5f" % r for r in hit[idx]]), + "\t".join(["%.5f" % r for r in ndcgs[idx]]), + ) + ) print(final_perf) -if __name__ == '__main__': + +if __name__ == "__main__": if not os.path.exists(args.weights_path): os.mkdir(args.weights_path) args.mess_dropout = eval(args.mess_dropout) @@ -107,4 +145,3 @@ def main(args): args.regs = eval(args.regs) print(args) main(args) - diff --git a/examples/pytorch/NGCF/NGCF/model.py b/examples/pytorch/NGCF/NGCF/model.py index 5dabfc032250..ce8736aa718d 100644 --- a/examples/pytorch/NGCF/NGCF/model.py +++ b/examples/pytorch/NGCF/NGCF/model.py @@ -1,58 +1,74 @@ import torch import torch.nn as nn import torch.nn.functional as F + import dgl.function as fn + class NGCFLayer(nn.Module): def __init__(self, in_size, out_size, norm_dict, dropout): super(NGCFLayer, self).__init__() self.in_size = in_size self.out_size = out_size - #weights for different types of messages - self.W1 = nn.Linear(in_size, out_size, bias = True) - self.W2 = nn.Linear(in_size, out_size, bias = True) + # weights for different types of messages + self.W1 = nn.Linear(in_size, out_size, bias=True) + self.W2 = nn.Linear(in_size, out_size, bias=True) - #leaky relu + # leaky relu self.leaky_relu = nn.LeakyReLU(0.2) - #dropout layer + # dropout layer self.dropout = nn.Dropout(dropout) - #initialization + # initialization torch.nn.init.xavier_uniform_(self.W1.weight) torch.nn.init.constant_(self.W1.bias, 0) torch.nn.init.xavier_uniform_(self.W2.weight) torch.nn.init.constant_(self.W2.bias, 0) - #norm + # norm self.norm_dict = norm_dict def forward(self, g, feat_dict): - funcs = {} #message and reduce functions dict - #for each type of edges, compute messages and reduce them all + funcs = {} # message and reduce functions dict + # for each type of edges, compute messages and reduce them all for srctype, etype, dsttype in g.canonical_etypes: - if srctype == dsttype: #for self loops + if srctype == dsttype: # for self loops messages = self.W1(feat_dict[srctype]) - g.nodes[srctype].data[etype] = messages #store in ndata - funcs[(srctype, etype, dsttype)] = (fn.copy_u(etype, 'm'), fn.sum('m', 'h')) #define message and reduce functions + g.nodes[srctype].data[etype] = messages # store in ndata + funcs[(srctype, etype, dsttype)] = ( + fn.copy_u(etype, "m"), + fn.sum("m", "h"), + ) # define message and reduce functions else: src, dst = g.edges(etype=(srctype, etype, dsttype)) norm = self.norm_dict[(srctype, etype, dsttype)] - messages = norm * (self.W1(feat_dict[srctype][src]) + self.W2(feat_dict[srctype][src]*feat_dict[dsttype][dst])) #compute messages - g.edges[(srctype, etype, dsttype)].data[etype] = messages #store in edata - funcs[(srctype, etype, dsttype)] = (fn.copy_e(etype, 'm'), fn.sum('m', 'h')) #define message and reduce functions - - g.multi_update_all(funcs, 'sum') #update all, reduce by first type-wisely then across different types - feature_dict={} + messages = norm * ( + self.W1(feat_dict[srctype][src]) + + self.W2(feat_dict[srctype][src] * feat_dict[dsttype][dst]) + ) # compute messages + g.edges[(srctype, etype, dsttype)].data[ + etype + ] = messages # store in edata + funcs[(srctype, etype, dsttype)] = ( + fn.copy_e(etype, "m"), + fn.sum("m", "h"), + ) # define message and reduce functions + + g.multi_update_all( + funcs, "sum" + ) # update all, reduce by first type-wisely then across different types + feature_dict = {} for ntype in g.ntypes: - h = self.leaky_relu(g.nodes[ntype].data['h']) #leaky relu - h = self.dropout(h) #dropout - h = F.normalize(h,dim=1,p=2) #l2 normalize + h = self.leaky_relu(g.nodes[ntype].data["h"]) # leaky relu + h = self.dropout(h) # dropout + h = F.normalize(h, dim=1, p=2) # l2 normalize feature_dict[ntype] = h return feature_dict + class NGCF(nn.Module): def __init__(self, g, in_size, layer_size, dropout, lmbd=1e-5): super(NGCF, self).__init__() @@ -60,9 +76,15 @@ def __init__(self, g, in_size, layer_size, dropout, lmbd=1e-5): self.norm_dict = dict() for srctype, etype, dsttype in g.canonical_etypes: src, dst = g.edges(etype=(srctype, etype, dsttype)) - dst_degree = g.in_degrees(dst, etype=(srctype, etype, dsttype)).float() #obtain degrees - src_degree = g.out_degrees(src, etype=(srctype, etype, dsttype)).float() - norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze(1) #compute norm + dst_degree = g.in_degrees( + dst, etype=(srctype, etype, dsttype) + ).float() # obtain degrees + src_degree = g.out_degrees( + src, etype=(srctype, etype, dsttype) + ).float() + norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze( + 1 + ) # compute norm self.norm_dict[(srctype, etype, dsttype)] = norm self.layers = nn.ModuleList() @@ -70,16 +92,26 @@ def __init__(self, g, in_size, layer_size, dropout, lmbd=1e-5): NGCFLayer(in_size, layer_size[0], self.norm_dict, dropout[0]) ) self.num_layers = len(layer_size) - for i in range(self.num_layers-1): + for i in range(self.num_layers - 1): self.layers.append( - NGCFLayer(layer_size[i], layer_size[i+1], self.norm_dict, dropout[i+1]) + NGCFLayer( + layer_size[i], + layer_size[i + 1], + self.norm_dict, + dropout[i + 1], + ) ) self.initializer = nn.init.xavier_uniform_ - #embeddings for different types of nodes - self.feature_dict = nn.ParameterDict({ - ntype: nn.Parameter(self.initializer(torch.empty(g.num_nodes(ntype), in_size))) for ntype in g.ntypes - }) + # embeddings for different types of nodes + self.feature_dict = nn.ParameterDict( + { + ntype: nn.Parameter( + self.initializer(torch.empty(g.num_nodes(ntype), in_size)) + ) + for ntype in g.ntypes + } + ) def create_bpr_loss(self, users, pos_items, neg_items): pos_scores = (users * pos_items).sum(1) @@ -88,7 +120,11 @@ def create_bpr_loss(self, users, pos_items, neg_items): mf_loss = nn.LogSigmoid()(pos_scores - neg_scores).mean() mf_loss = -1 * mf_loss - regularizer = (torch.norm(users) ** 2 + torch.norm(pos_items) ** 2 + torch.norm(neg_items) ** 2) / 2 + regularizer = ( + torch.norm(users) ** 2 + + torch.norm(pos_items) ** 2 + + torch.norm(neg_items) ** 2 + ) / 2 emb_loss = self.lmbd * regularizer / users.shape[0] return mf_loss + emb_loss, mf_loss, emb_loss @@ -96,9 +132,9 @@ def create_bpr_loss(self, users, pos_items, neg_items): def rating(self, u_g_embeddings, pos_i_g_embeddings): return torch.matmul(u_g_embeddings, pos_i_g_embeddings.t()) - def forward(self, g,user_key, item_key, users, pos_items, neg_items): - h_dict = {ntype : self.feature_dict[ntype] for ntype in g.ntypes} - #obtain features of each layer and concatenate them all + def forward(self, g, user_key, item_key, users, pos_items, neg_items): + h_dict = {ntype: self.feature_dict[ntype] for ntype in g.ntypes} + # obtain features of each layer and concatenate them all user_embeds = [] item_embeds = [] user_embeds.append(h_dict[user_key]) diff --git a/examples/pytorch/NGCF/NGCF/utility/batch_test.py b/examples/pytorch/NGCF/NGCF/utility/batch_test.py index ec7a38c90518..9747346da6fd 100644 --- a/examples/pytorch/NGCF/NGCF/utility/batch_test.py +++ b/examples/pytorch/NGCF/NGCF/utility/batch_test.py @@ -2,22 +2,26 @@ # . # It implements the batch test. +import heapq +import multiprocessing + import utility.metrics as metrics -from utility.parser import parse_args from utility.load_data import * -import multiprocessing -import heapq +from utility.parser import parse_args cores = multiprocessing.cpu_count() args = parse_args() Ks = eval(args.Ks) -data_generator = Data(path=args.data_path + args.dataset, batch_size=args.batch_size) +data_generator = Data( + path=args.data_path + args.dataset, batch_size=args.batch_size +) USR_NUM, ITEM_NUM = data_generator.n_users, data_generator.n_items N_TRAIN, N_TEST = data_generator.n_train, data_generator.n_test BATCH_SIZE = args.batch_size + def ranklist_by_heapq(user_pos_test, test_items, rating, Ks): item_score = {} for i in test_items: @@ -32,9 +36,10 @@ def ranklist_by_heapq(user_pos_test, test_items, rating, Ks): r.append(1) else: r.append(0) - auc = 0. + auc = 0.0 return r, auc + def get_auc(item_score, user_pos_test): item_score = sorted(item_score.items(), key=lambda kv: kv[1]) item_score.reverse() @@ -50,6 +55,7 @@ def get_auc(item_score, user_pos_test): auc = metrics.auc(ground_truth=r, prediction=posterior) return auc + def ranklist_by_sorted(user_pos_test, test_items, rating, Ks): item_score = {} for i in test_items: @@ -67,6 +73,7 @@ def ranklist_by_sorted(user_pos_test, test_items, rating, Ks): auc = get_auc(item_score, user_pos_test) return r, auc + def get_performance(user_pos_test, r, auc, Ks): precision, recall, ndcg, hit_ratio = [], [], [], [] @@ -76,28 +83,33 @@ def get_performance(user_pos_test, r, auc, Ks): ndcg.append(metrics.ndcg_at_k(r, K)) hit_ratio.append(metrics.hit_at_k(r, K)) - return {'recall': np.array(recall), 'precision': np.array(precision), - 'ndcg': np.array(ndcg), 'hit_ratio': np.array(hit_ratio), 'auc': auc} + return { + "recall": np.array(recall), + "precision": np.array(precision), + "ndcg": np.array(ndcg), + "hit_ratio": np.array(hit_ratio), + "auc": auc, + } def test_one_user(x): # user u's ratings for user u rating = x[0] - #uid + # uid u = x[1] - #user u's items in the training set + # user u's items in the training set try: training_items = data_generator.train_items[u] except Exception: training_items = [] - #user u's items in the test set + # user u's items in the test set user_pos_test = data_generator.test_set[u] all_items = set(range(ITEM_NUM)) test_items = list(all_items - set(training_items)) - if args.test_flag == 'part': + if args.test_flag == "part": r, auc = ranklist_by_heapq(user_pos_test, test_items, rating, Ks) else: r, auc = ranklist_by_sorted(user_pos_test, test_items, rating, Ks) @@ -106,8 +118,13 @@ def test_one_user(x): def test(model, g, users_to_test, batch_test_flag=False): - result = {'precision': np.zeros(len(Ks)), 'recall': np.zeros(len(Ks)), 'ndcg': np.zeros(len(Ks)), - 'hit_ratio': np.zeros(len(Ks)), 'auc': 0.} + result = { + "precision": np.zeros(len(Ks)), + "recall": np.zeros(len(Ks)), + "ndcg": np.zeros(len(Ks)), + "hit_ratio": np.zeros(len(Ks)), + "auc": 0.0, + } pool = multiprocessing.Pool(cores) @@ -124,7 +141,7 @@ def test(model, g, users_to_test, batch_test_flag=False): start = u_batch_id * u_batch_size end = (u_batch_id + 1) * u_batch_size - user_batch = test_users[start: end] + user_batch = test_users[start:end] if batch_test_flag: # batch-item test @@ -138,10 +155,16 @@ def test(model, g, users_to_test, batch_test_flag=False): item_batch = range(i_start, i_end) - u_g_embeddings, pos_i_g_embeddings, _ = model(g, 'user', 'item',user_batch, item_batch, []) - i_rate_batch = model.rating(u_g_embeddings, pos_i_g_embeddings).detach().cpu() + u_g_embeddings, pos_i_g_embeddings, _ = model( + g, "user", "item", user_batch, item_batch, [] + ) + i_rate_batch = ( + model.rating(u_g_embeddings, pos_i_g_embeddings) + .detach() + .cpu() + ) - rate_batch[:, i_start: i_end] = i_rate_batch + rate_batch[:, i_start:i_end] = i_rate_batch i_count += i_rate_batch.shape[1] assert i_count == ITEM_NUM @@ -149,20 +172,23 @@ def test(model, g, users_to_test, batch_test_flag=False): else: # all-item test item_batch = range(ITEM_NUM) - u_g_embeddings, pos_i_g_embeddings, _ = model(g, 'user', 'item',user_batch, item_batch, []) - rate_batch = model.rating(u_g_embeddings, pos_i_g_embeddings).detach().cpu() + u_g_embeddings, pos_i_g_embeddings, _ = model( + g, "user", "item", user_batch, item_batch, [] + ) + rate_batch = ( + model.rating(u_g_embeddings, pos_i_g_embeddings).detach().cpu() + ) user_batch_rating_uid = zip(rate_batch.numpy(), user_batch) batch_result = pool.map(test_one_user, user_batch_rating_uid) count += len(batch_result) for re in batch_result: - result['precision'] += re['precision']/n_test_users - result['recall'] += re['recall']/n_test_users - result['ndcg'] += re['ndcg']/n_test_users - result['hit_ratio'] += re['hit_ratio']/n_test_users - result['auc'] += re['auc']/n_test_users - + result["precision"] += re["precision"] / n_test_users + result["recall"] += re["recall"] / n_test_users + result["ndcg"] += re["ndcg"] / n_test_users + result["hit_ratio"] += re["hit_ratio"] / n_test_users + result["auc"] += re["auc"] / n_test_users assert count == n_test_users pool.close() diff --git a/examples/pytorch/NGCF/NGCF/utility/helper.py b/examples/pytorch/NGCF/NGCF/utility/helper.py index c4e5da929410..b80253036f52 100644 --- a/examples/pytorch/NGCF/NGCF/utility/helper.py +++ b/examples/pytorch/NGCF/NGCF/utility/helper.py @@ -1,52 +1,67 @@ # This file is copied from the NGCF author's implementation # . # It implements the helper functions. -''' +""" Created on Aug 19, 2016 @author: Xiang Wang (xiangwang@u.nus.edu) -''' +""" __author__ = "xiangwang" import os import re + def txt2list(file_src): orig_file = open(file_src, "r") lines = orig_file.readlines() return lines + def ensureDir(dir_path): d = os.path.dirname(dir_path) if not os.path.exists(d): os.makedirs(d) + def uni2str(unicode_str): - return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip() + return str(unicode_str.encode("ascii", "ignore")).replace("\n", "").strip() + def hasNumbers(inputString): - return bool(re.search(r'\d', inputString)) + return bool(re.search(r"\d", inputString)) + def delMultiChar(inputString, chars): for ch in chars: - inputString = inputString.replace(ch, '') + inputString = inputString.replace(ch, "") return inputString + def merge_two_dicts(x, y): - z = x.copy() # start with x's keys and values - z.update(y) # modifies z with y's keys and values & returns None + z = x.copy() # start with x's keys and values + z.update(y) # modifies z with y's keys and values & returns None return z -def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100): + +def early_stopping( + log_value, best_value, stopping_step, expected_order="acc", flag_step=100 +): # early stopping strategy: - assert expected_order in ['acc', 'dec'] + assert expected_order in ["acc", "dec"] - if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value): + if (expected_order == "acc" and log_value >= best_value) or ( + expected_order == "dec" and log_value <= best_value + ): stopping_step = 0 best_value = log_value else: stopping_step += 1 if stopping_step >= flag_step: - print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value)) + print( + "Early stopping is trigger at step: {} log:{}".format( + flag_step, log_value + ) + ) should_stop = True else: should_stop = False diff --git a/examples/pytorch/NGCF/NGCF/utility/load_data.py b/examples/pytorch/NGCF/NGCF/utility/load_data.py index 3b8a8a83c39d..ac85c9e7826b 100644 --- a/examples/pytorch/NGCF/NGCF/utility/load_data.py +++ b/examples/pytorch/NGCF/NGCF/utility/load_data.py @@ -1,19 +1,22 @@ # This file is based on the NGCF author's implementation # . # It implements the data processing and graph construction. -import numpy as np import random as rd + +import numpy as np + import dgl + class Data(object): def __init__(self, path, batch_size): self.path = path self.batch_size = batch_size - train_file = path + '/train.txt' - test_file = path + '/test.txt' + train_file = path + "/train.txt" + test_file = path + "/test.txt" - #get number of users and items + # get number of users and items self.n_users, self.n_items = 0, 0 self.n_train, self.n_test = 0, 0 self.exist_users = [] @@ -24,7 +27,7 @@ def __init__(self, path, batch_size): with open(train_file) as f: for l in f.readlines(): if len(l) > 0: - l = l.strip('\n').split(' ') + l = l.strip("\n").split(" ") items = [int(i) for i in l[1:]] uid = int(l[0]) self.exist_users.append(uid) @@ -38,9 +41,9 @@ def __init__(self, path, batch_size): with open(test_file) as f: for l in f.readlines(): if len(l) > 0: - l = l.strip('\n') + l = l.strip("\n") try: - items = [int(i) for i in l.split(' ')[1:]] + items = [int(i) for i in l.split(" ")[1:]] except Exception: continue self.n_items = max(self.n_items, max(items)) @@ -50,51 +53,51 @@ def __init__(self, path, batch_size): self.print_statistics() - #training positive items corresponding to each user; testing positive items corresponding to each user + # training positive items corresponding to each user; testing positive items corresponding to each user self.train_items, self.test_set = {}, {} with open(train_file) as f_train: with open(test_file) as f_test: for l in f_train.readlines(): if len(l) == 0: break - l = l.strip('\n') - items = [int(i) for i in l.split(' ')] + l = l.strip("\n") + items = [int(i) for i in l.split(" ")] uid, train_items = items[0], items[1:] self.train_items[uid] = train_items for l in f_test.readlines(): - if len(l) == 0: break - l = l.strip('\n') + if len(l) == 0: + break + l = l.strip("\n") try: - items = [int(i) for i in l.split(' ')] + items = [int(i) for i in l.split(" ")] except Exception: continue uid, test_items = items[0], items[1:] self.test_set[uid] = test_items - - #construct graph from the train data and add self-loops - user_selfs = [ i for i in range(self.n_users)] - item_selfs = [ i for i in range(self.n_items)] - + + # construct graph from the train data and add self-loops + user_selfs = [i for i in range(self.n_users)] + item_selfs = [i for i in range(self.n_items)] + data_dict = { - ('user', 'user_self', 'user') : (user_selfs, user_selfs), - ('item', 'item_self', 'item') : (item_selfs, item_selfs), - ('user', 'ui', 'item') : (user_item_src, user_item_dst), - ('item', 'iu', 'user') : (user_item_dst, user_item_src) - } - num_dict = { - 'user': self.n_users, 'item': self.n_items + ("user", "user_self", "user"): (user_selfs, user_selfs), + ("item", "item_self", "item"): (item_selfs, item_selfs), + ("user", "ui", "item"): (user_item_src, user_item_dst), + ("item", "iu", "user"): (user_item_dst, user_item_src), } + num_dict = {"user": self.n_users, "item": self.n_items} self.g = dgl.heterograph(data_dict, num_nodes_dict=num_dict) - def sample(self): if self.batch_size <= self.n_users: users = rd.sample(self.exist_users, self.batch_size) else: - users = [rd.choice(self.exist_users) for _ in range(self.batch_size)] + users = [ + rd.choice(self.exist_users) for _ in range(self.batch_size) + ] def sample_pos_items_for_u(u, num): # sample num pos items for u-th user @@ -117,12 +120,14 @@ def sample_neg_items_for_u(u, num): while True: if len(neg_items) == num: break - neg_id = np.random.randint(low=0, high=self.n_items,size=1)[0] - if neg_id not in self.train_items[u] and neg_id not in neg_items: + neg_id = np.random.randint(low=0, high=self.n_items, size=1)[0] + if ( + neg_id not in self.train_items[u] + and neg_id not in neg_items + ): neg_items.append(neg_id) return neg_items - pos_items, neg_items = [], [] for u in users: pos_items += sample_pos_items_for_u(u, 1) @@ -134,10 +139,13 @@ def get_num_users_items(self): return self.n_users, self.n_items def print_statistics(self): - print('n_users=%d, n_items=%d' % (self.n_users, self.n_items)) - print('n_interactions=%d' % (self.n_train + self.n_test)) - print('n_train=%d, n_test=%d, sparsity=%.5f' % (self.n_train, self.n_test, (self.n_train + self.n_test)/(self.n_users * self.n_items))) - - - - + print("n_users=%d, n_items=%d" % (self.n_users, self.n_items)) + print("n_interactions=%d" % (self.n_train + self.n_test)) + print( + "n_train=%d, n_test=%d, sparsity=%.5f" + % ( + self.n_train, + self.n_test, + (self.n_train + self.n_test) / (self.n_users * self.n_items), + ) + ) diff --git a/examples/pytorch/NGCF/NGCF/utility/metrics.py b/examples/pytorch/NGCF/NGCF/utility/metrics.py index c79cfb4ef75e..d869a7682fcf 100644 --- a/examples/pytorch/NGCF/NGCF/utility/metrics.py +++ b/examples/pytorch/NGCF/NGCF/utility/metrics.py @@ -1,18 +1,21 @@ # This file is copied from the NGCF author's implementation # . # It implements the metrics. -''' +""" Created on Oct 10, 2018 Tensorflow Implementation of Neural Graph Collaborative Filtering (NGCF) model in: Wang Xiang et al. Neural Graph Collaborative Filtering. In SIGIR 2019. @author: Xiang Wang (xiangwang@u.nus.edu) -''' +""" import numpy as np from sklearn.metrics import roc_auc_score + def recall(rank, ground_truth, N): - return len(set(rank[:N]) & set(ground_truth)) / float(len(set(ground_truth))) + return len(set(rank[:N]) & set(ground_truth)) / float( + len(set(ground_truth)) + ) def precision_at_k(r, k): @@ -28,7 +31,7 @@ def precision_at_k(r, k): return np.mean(r) -def average_precision(r,cut): +def average_precision(r, cut): """Score is average precision (area under PR curve) Relevance is binary (nonzero is relevant). Returns: @@ -37,8 +40,8 @@ def average_precision(r,cut): r = np.asarray(r) out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]] if not out: - return 0. - return np.sum(out)/float(min(cut, np.sum(r))) + return 0.0 + return np.sum(out) / float(min(cut, np.sum(r))) def mean_average_precision(rs): @@ -64,8 +67,8 @@ def dcg_at_k(r, k, method=1): elif method == 1: return np.sum(r / np.log2(np.arange(2, r.size + 2))) else: - raise ValueError('method must be 0 or 1.') - return 0. + raise ValueError("method must be 0 or 1.") + return 0.0 def ndcg_at_k(r, k, method=1): @@ -77,7 +80,7 @@ def ndcg_at_k(r, k, method=1): """ dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) if not dcg_max: - return 0. + return 0.0 return dcg_at_k(r, k, method) / dcg_max @@ -89,19 +92,21 @@ def recall_at_k(r, k, all_pos_num): def hit_at_k(r, k): r = np.array(r)[:k] if np.sum(r) > 0: - return 1. + return 1.0 else: - return 0. + return 0.0 + def F1(pre, rec): if pre + rec > 0: return (2.0 * pre * rec) / (pre + rec) else: - return 0. + return 0.0 + def auc(ground_truth, prediction): try: res = roc_auc_score(y_true=ground_truth, y_score=prediction) except Exception: - res = 0. - return res \ No newline at end of file + res = 0.0 + return res diff --git a/examples/pytorch/NGCF/NGCF/utility/parser.py b/examples/pytorch/NGCF/NGCF/utility/parser.py index 04ef5d36da89..7a153ce4f633 100644 --- a/examples/pytorch/NGCF/NGCF/utility/parser.py +++ b/examples/pytorch/NGCF/NGCF/utility/parser.py @@ -3,51 +3,88 @@ import argparse + def parse_args(): parser = argparse.ArgumentParser(description="Run NGCF.") - parser.add_argument('--weights_path', nargs='?', default='model/', - help='Store model path.') - parser.add_argument('--data_path', nargs='?', default='../Data/', - help='Input data path.') - parser.add_argument('--model_name', type=str, default='NGCF.pkl', - help='Saved model name.') - - - parser.add_argument('--dataset', nargs='?', default='gowalla', - help='Choose a dataset from {gowalla, yelp2018, amazon-book}') - parser.add_argument('--verbose', type=int, default=1, - help='Interval of evaluation.') - parser.add_argument('--epoch', type=int, default=400, - help='Number of epoch.') + parser.add_argument( + "--weights_path", nargs="?", default="model/", help="Store model path." + ) + parser.add_argument( + "--data_path", nargs="?", default="../Data/", help="Input data path." + ) + parser.add_argument( + "--model_name", type=str, default="NGCF.pkl", help="Saved model name." + ) - parser.add_argument('--embed_size', type=int, default=64, - help='Embedding size.') - parser.add_argument('--layer_size', nargs='?', default='[64,64,64]', - help='Output sizes of every layer') - parser.add_argument('--batch_size', type=int, default=1024, - help='Batch size.') + parser.add_argument( + "--dataset", + nargs="?", + default="gowalla", + help="Choose a dataset from {gowalla, yelp2018, amazon-book}", + ) + parser.add_argument( + "--verbose", type=int, default=1, help="Interval of evaluation." + ) + parser.add_argument( + "--epoch", type=int, default=400, help="Number of epoch." + ) - parser.add_argument('--regs', nargs='?', default='[1e-5]', - help='Regularizations.') - parser.add_argument('--lr', type=float, default=0.0001, - help='Learning rate.') + parser.add_argument( + "--embed_size", type=int, default=64, help="Embedding size." + ) + parser.add_argument( + "--layer_size", + nargs="?", + default="[64,64,64]", + help="Output sizes of every layer", + ) + parser.add_argument( + "--batch_size", type=int, default=1024, help="Batch size." + ) + parser.add_argument( + "--regs", nargs="?", default="[1e-5]", help="Regularizations." + ) + parser.add_argument( + "--lr", type=float, default=0.0001, help="Learning rate." + ) - parser.add_argument('--gpu', type=int, default=0, - help='0 for NAIS_prod, 1 for NAIS_concat') + parser.add_argument( + "--gpu", type=int, default=0, help="0 for NAIS_prod, 1 for NAIS_concat" + ) - parser.add_argument('--mess_dropout', nargs='?', default='[0.1,0.1,0.1]', - help='Keep probability w.r.t. message dropout (i.e., 1-dropout_ratio) for each deep layer. 1: no dropout.') + parser.add_argument( + "--mess_dropout", + nargs="?", + default="[0.1,0.1,0.1]", + help="Keep probability w.r.t. message dropout (i.e., 1-dropout_ratio) for each deep layer. 1: no dropout.", + ) - parser.add_argument('--Ks', nargs='?', default='[20, 40]', - help='Output sizes of every layer') + parser.add_argument( + "--Ks", + nargs="?", + default="[20, 40]", + help="Output sizes of every layer", + ) - parser.add_argument('--save_flag', type=int, default=1, - help='0: Disable model saver, 1: Activate model saver') + parser.add_argument( + "--save_flag", + type=int, + default=1, + help="0: Disable model saver, 1: Activate model saver", + ) - parser.add_argument('--test_flag', nargs='?', default='part', - help='Specify the test type from {part, full}, indicating whether the reference is done in mini-batch') + parser.add_argument( + "--test_flag", + nargs="?", + default="part", + help="Specify the test type from {part, full}, indicating whether the reference is done in mini-batch", + ) - parser.add_argument('--report', type=int, default=0, - help='0: Disable performance report w.r.t. sparsity levels, 1: Show performance report w.r.t. sparsity levels') + parser.add_argument( + "--report", + type=int, + default=0, + help="0: Disable performance report w.r.t. sparsity levels, 1: Show performance report w.r.t. sparsity levels", + ) return parser.parse_args() diff --git a/examples/pytorch/P-GNN/main.py b/examples/pytorch/P-GNN/main.py index 0ec45e2fe6b4..a834f3018e3f 100644 --- a/examples/pytorch/P-GNN/main.py +++ b/examples/pytorch/P-GNN/main.py @@ -1,39 +1,66 @@ import os -import dgl -import torch +import warnings + import numpy as np +import torch import torch.nn as nn from model import PGNN from sklearn.metrics import roc_auc_score from utils import get_dataset, preselect_anchor -import warnings -warnings.filterwarnings('ignore') +import dgl + +warnings.filterwarnings("ignore") -def get_loss(p, data, out, loss_func, device, get_auc=True): - edge_mask = np.concatenate((data['positive_edges_{}'.format(p)], data['negative_edges_{}'.format(p)]), axis=-1) - nodes_first = torch.index_select(out, 0, torch.from_numpy(edge_mask[0, :]).long().to(out.device)) - nodes_second = torch.index_select(out, 0, torch.from_numpy(edge_mask[1, :]).long().to(out.device)) +def get_loss(p, data, out, loss_func, device, get_auc=True): + edge_mask = np.concatenate( + ( + data["positive_edges_{}".format(p)], + data["negative_edges_{}".format(p)], + ), + axis=-1, + ) + + nodes_first = torch.index_select( + out, 0, torch.from_numpy(edge_mask[0, :]).long().to(out.device) + ) + nodes_second = torch.index_select( + out, 0, torch.from_numpy(edge_mask[1, :]).long().to(out.device) + ) pred = torch.sum(nodes_first * nodes_second, dim=-1) - label_positive = torch.ones([data['positive_edges_{}'.format(p)].shape[1], ], dtype=pred.dtype) - label_negative = torch.zeros([data['negative_edges_{}'.format(p)].shape[1], ], dtype=pred.dtype) + label_positive = torch.ones( + [ + data["positive_edges_{}".format(p)].shape[1], + ], + dtype=pred.dtype, + ) + label_negative = torch.zeros( + [ + data["negative_edges_{}".format(p)].shape[1], + ], + dtype=pred.dtype, + ) label = torch.cat((label_positive, label_negative)).to(device) loss = loss_func(pred, label) if get_auc: - auc = roc_auc_score(label.flatten().cpu().numpy(), torch.sigmoid(pred).flatten().data.cpu().numpy()) + auc = roc_auc_score( + label.flatten().cpu().numpy(), + torch.sigmoid(pred).flatten().data.cpu().numpy(), + ) return loss, auc else: return loss + def train_model(data, model, loss_func, optimizer, device, g_data): model.train() out = model(g_data) - loss = get_loss('train', data, out, loss_func, device, get_auc=False) + loss = get_loss("train", data, out, loss_func, device, get_auc=False) optimizer.zero_grad() loss.backward() @@ -42,35 +69,41 @@ def train_model(data, model, loss_func, optimizer, device, g_data): return g_data + def eval_model(data, g_data, model, loss_func, device): model.eval() out = model(g_data) # train loss and auc - tmp_loss, auc_train = get_loss('train', data, out, loss_func, device) + tmp_loss, auc_train = get_loss("train", data, out, loss_func, device) loss_train = tmp_loss.cpu().data.numpy() # val loss and auc - _, auc_val = get_loss('val', data, out, loss_func, device) + _, auc_val = get_loss("val", data, out, loss_func, device) # test loss and auc - _, auc_test = get_loss('test', data, out, loss_func, device) + _, auc_test = get_loss("test", data, out, loss_func, device) return loss_train, auc_train, auc_val, auc_test + def main(args): # The mean and standard deviation of the experiment results # are stored in the 'results' folder - if not os.path.isdir('results'): - os.mkdir('results') + if not os.path.isdir("results"): + os.mkdir("results") if torch.cuda.is_available(): - device = 'cuda:0' + device = "cuda:0" else: - device = 'cpu' + device = "cpu" - print('Learning Type: {}'.format(['Transductive', 'Inductive'][args.inductive]), - 'Task: {}'.format(args.task)) + print( + "Learning Type: {}".format( + ["Transductive", "Inductive"][args.inductive] + ), + "Task: {}".format(args.task), + ) results = [] @@ -78,13 +111,20 @@ def main(args): data = get_dataset(args) # pre-sample anchor nodes and compute shortest distance values for all epochs - g_list, anchor_eid_list, dist_max_list, edge_weight_list = preselect_anchor(data, args) + ( + g_list, + anchor_eid_list, + dist_max_list, + edge_weight_list, + ) = preselect_anchor(data, args) # model - model = PGNN(input_dim=data['feature'].shape[1]).to(device) + model = PGNN(input_dim=data["feature"].shape[1]).to(device) # loss - optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) + optimizer = torch.optim.Adam( + model.parameters(), lr=1e-2, weight_decay=5e-4 + ) loss_func = nn.BCEWithLogitsLoss() best_auc_val = -1 @@ -93,55 +133,79 @@ def main(args): for epoch in range(args.epoch_num): if epoch == 200: for param_group in optimizer.param_groups: - param_group['lr'] /= 10 + param_group["lr"] /= 10 g = dgl.graph(g_list[epoch]) - g.ndata['feat'] = torch.FloatTensor(data['feature']) - g.edata['sp_dist'] = torch.FloatTensor(edge_weight_list[epoch]) + g.ndata["feat"] = torch.FloatTensor(data["feature"]) + g.edata["sp_dist"] = torch.FloatTensor(edge_weight_list[epoch]) g_data = { - 'graph': g.to(device), - 'anchor_eid': anchor_eid_list[epoch], - 'dists_max': dist_max_list[epoch] + "graph": g.to(device), + "anchor_eid": anchor_eid_list[epoch], + "dists_max": dist_max_list[epoch], } train_model(data, model, loss_func, optimizer, device, g_data) loss_train, auc_train, auc_val, auc_test = eval_model( - data, g_data, model, loss_func, device) + data, g_data, model, loss_func, device + ) if auc_val > best_auc_val: best_auc_val = auc_val best_auc_test = auc_test if epoch % args.epoch_log == 0: - print(repeat, epoch, 'Loss {:.4f}'.format(loss_train), 'Train AUC: {:.4f}'.format(auc_train), - 'Val AUC: {:.4f}'.format(auc_val), 'Test AUC: {:.4f}'.format(auc_test), - 'Best Val AUC: {:.4f}'.format(best_auc_val), 'Best Test AUC: {:.4f}'.format(best_auc_test)) + print( + repeat, + epoch, + "Loss {:.4f}".format(loss_train), + "Train AUC: {:.4f}".format(auc_train), + "Val AUC: {:.4f}".format(auc_val), + "Test AUC: {:.4f}".format(auc_test), + "Best Val AUC: {:.4f}".format(best_auc_val), + "Best Test AUC: {:.4f}".format(best_auc_test), + ) results.append(best_auc_test) results = np.array(results) results_mean = np.mean(results).round(6) results_std = np.std(results).round(6) - print('-----------------Final-------------------') + print("-----------------Final-------------------") print(results_mean, results_std) - with open('results/{}_{}_{}.txt'.format(['Transductive', 'Inductive'][args.inductive], args.task, - args.k_hop_dist), 'w') as f: - f.write('{}, {}\n'.format(results_mean, results_std)) + with open( + "results/{}_{}_{}.txt".format( + ["Transductive", "Inductive"][args.inductive], + args.task, + args.k_hop_dist, + ), + "w", + ) as f: + f.write("{}, {}\n".format(results_mean, results_std)) + -if __name__ == '__main__': +if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() - parser.add_argument('--task', type=str, default='link', choices=['link', 'link_pair']) - parser.add_argument('--inductive', action='store_true', - help='Inductive learning or transductive learning') - parser.add_argument('--k_hop_dist', default=-1, type=int, - help='K-hop shortest path distance, -1 means exact shortest path.') - - parser.add_argument('--epoch_num', type=int, default=2000) - parser.add_argument('--repeat_num', type=int, default=10) - parser.add_argument('--epoch_log', type=int, default=100) + parser.add_argument( + "--task", type=str, default="link", choices=["link", "link_pair"] + ) + parser.add_argument( + "--inductive", + action="store_true", + help="Inductive learning or transductive learning", + ) + parser.add_argument( + "--k_hop_dist", + default=-1, + type=int, + help="K-hop shortest path distance, -1 means exact shortest path.", + ) + + parser.add_argument("--epoch_num", type=int, default=2000) + parser.add_argument("--repeat_num", type=int, default=10) + parser.add_argument("--epoch_log", type=int, default=100) args = parser.parse_args() main(args) diff --git a/examples/pytorch/P-GNN/model.py b/examples/pytorch/P-GNN/model.py index 428a0b355d51..086b3b0e0168 100644 --- a/examples/pytorch/P-GNN/model.py +++ b/examples/pytorch/P-GNN/model.py @@ -1,8 +1,10 @@ import torch import torch.nn as nn -import dgl.function as fn import torch.nn.functional as F +import dgl.function as fn + + class PGNN_layer(nn.Module): def __init__(self, input_dim, output_dim): super(PGNN_layer, self).__init__() @@ -17,23 +19,31 @@ def forward(self, graph, feature, anchor_eid, dists_max): with graph.local_scope(): u_feat = self.linear_hidden_u(feature) v_feat = self.linear_hidden_v(feature) - graph.srcdata.update({'u_feat': u_feat}) - graph.dstdata.update({'v_feat': v_feat}) + graph.srcdata.update({"u_feat": u_feat}) + graph.dstdata.update({"v_feat": v_feat}) - graph.apply_edges(fn.u_mul_e('u_feat', 'sp_dist', 'u_message')) - graph.apply_edges(fn.v_add_e('v_feat', 'u_message', 'message')) + graph.apply_edges(fn.u_mul_e("u_feat", "sp_dist", "u_message")) + graph.apply_edges(fn.v_add_e("v_feat", "u_message", "message")) - messages = torch.index_select(graph.edata['message'], 0, - torch.LongTensor(anchor_eid).to(feature.device)) - messages = messages.reshape(dists_max.shape[0], dists_max.shape[1], messages.shape[-1]) + messages = torch.index_select( + graph.edata["message"], + 0, + torch.LongTensor(anchor_eid).to(feature.device), + ) + messages = messages.reshape( + dists_max.shape[0], dists_max.shape[1], messages.shape[-1] + ) messages = self.act(messages) # n*m*d - out_position = self.linear_out_position(messages).squeeze(-1) # n*m_out + out_position = self.linear_out_position(messages).squeeze( + -1 + ) # n*m_out out_structure = torch.mean(messages, dim=1) # n*d return out_position, out_structure + class PGNN(nn.Module): def __init__(self, input_dim, feature_dim=32, dropout=0.5): super(PGNN, self).__init__() @@ -44,12 +54,16 @@ def __init__(self, input_dim, feature_dim=32, dropout=0.5): self.conv_out = PGNN_layer(feature_dim, feature_dim) def forward(self, data): - x = data['graph'].ndata['feat'] - graph = data['graph'] + x = data["graph"].ndata["feat"] + graph = data["graph"] x = self.linear_pre(x) - x_position, x = self.conv_first(graph, x, data['anchor_eid'], data['dists_max']) + x_position, x = self.conv_first( + graph, x, data["anchor_eid"], data["dists_max"] + ) x = self.dropout(x) - x_position, x = self.conv_out(graph, x, data['anchor_eid'], data['dists_max']) + x_position, x = self.conv_out( + graph, x, data["anchor_eid"], data["dists_max"] + ) x_position = F.normalize(x_position, p=2, dim=-1) return x_position diff --git a/examples/pytorch/P-GNN/utils.py b/examples/pytorch/P-GNN/utils.py index 2074fff607c0..0b5e38f28867 100644 --- a/examples/pytorch/P-GNN/utils.py +++ b/examples/pytorch/P-GNN/utils.py @@ -1,10 +1,12 @@ -import torch +import multiprocessing as mp import random -import numpy as np +from multiprocessing import get_context + import networkx as nx +import numpy as np +import torch from tqdm.auto import tqdm -import multiprocessing as mp -from multiprocessing import get_context + def get_communities(remove_feature): community_size = 20 @@ -45,14 +47,15 @@ def get_communities(remove_feature): feature = np.identity(n)[:, rand_order] data = { - 'edge_index': edge_index, - 'feature': feature, - 'positive_edges': np.stack(np.nonzero(label)), - 'num_nodes': feature.shape[0] + "edge_index": edge_index, + "feature": feature, + "positive_edges": np.stack(np.nonzero(label)), + "num_nodes": feature.shape[0], } return data + def to_single_directed(edges): edges_new = np.zeros((2, edges.shape[1] // 2), dtype=int) j = 0 @@ -63,6 +66,7 @@ def to_single_directed(edges): return edges_new + # each node at least remain in the new graph def split_edges(p, edges, data, non_train_ratio=0.2): e = edges.shape[1] @@ -70,15 +74,19 @@ def split_edges(p, edges, data, non_train_ratio=0.2): split1 = int((1 - non_train_ratio) * e) split2 = int((1 - non_train_ratio / 2) * e) - data.update({ - '{}_edges_train'.format(p): edges[:, :split1], # 80% - '{}_edges_val'.format(p): edges[:, split1:split2], # 10% - '{}_edges_test'.format(p): edges[:, split2:] # 10% - }) + data.update( + { + "{}_edges_train".format(p): edges[:, :split1], # 80% + "{}_edges_val".format(p): edges[:, split1:split2], # 10% + "{}_edges_test".format(p): edges[:, split2:], # 10% + } + ) + def to_bidirected(edges): return np.concatenate((edges, edges[::-1, :]), axis=-1) + def get_negative_edges(positive_edges, num_nodes, num_negative_edges): positive_edge_set = [] positive_edges = to_bidirected(positive_edges) @@ -86,54 +94,76 @@ def get_negative_edges(positive_edges, num_nodes, num_negative_edges): positive_edge_set.append(tuple(positive_edges[:, i])) positive_edge_set = set(positive_edge_set) - negative_edges = np.zeros((2, num_negative_edges), dtype=positive_edges.dtype) + negative_edges = np.zeros( + (2, num_negative_edges), dtype=positive_edges.dtype + ) for i in range(num_negative_edges): while True: - mask_temp = tuple(np.random.choice(num_nodes, size=(2,), replace=False)) + mask_temp = tuple( + np.random.choice(num_nodes, size=(2,), replace=False) + ) if mask_temp not in positive_edge_set: negative_edges[:, i] = mask_temp break return negative_edges + def get_pos_neg_edges(data, infer_link_positive=True): if infer_link_positive: - data['positive_edges'] = to_single_directed(data['edge_index'].numpy()) - split_edges('positive', data['positive_edges'], data) + data["positive_edges"] = to_single_directed(data["edge_index"].numpy()) + split_edges("positive", data["positive_edges"], data) # resample edge mask link negative - negative_edges = get_negative_edges(data['positive_edges'], data['num_nodes'], - num_negative_edges=data['positive_edges'].shape[1]) - split_edges('negative', negative_edges, data) + negative_edges = get_negative_edges( + data["positive_edges"], + data["num_nodes"], + num_negative_edges=data["positive_edges"].shape[1], + ) + split_edges("negative", negative_edges, data) return data + def shortest_path(graph, node_range, cutoff): dists_dict = {} for node in tqdm(node_range, leave=False): - dists_dict[node] = nx.single_source_shortest_path_length(graph, node, cutoff) + dists_dict[node] = nx.single_source_shortest_path_length( + graph, node, cutoff + ) return dists_dict + def merge_dicts(dicts): result = {} for dictionary in dicts: result.update(dictionary) return result + def all_pairs_shortest_path(graph, cutoff=None, num_workers=4): nodes = list(graph.nodes) random.shuffle(nodes) pool = mp.Pool(processes=num_workers) interval_size = len(nodes) / num_workers - results = [pool.apply_async(shortest_path, args=( - graph, nodes[int(interval_size * i): int(interval_size * (i + 1))], cutoff)) - for i in range(num_workers)] + results = [ + pool.apply_async( + shortest_path, + args=( + graph, + nodes[int(interval_size * i) : int(interval_size * (i + 1))], + cutoff, + ), + ) + for i in range(num_workers) + ] output = [p.get() for p in results] dists_dict = merge_dicts(output) pool.close() pool.join() return dists_dict + def precompute_dist_data(edge_index, num_nodes, approximate=0): """ Here dist is 1/real_dist, higher actually means closer, 0 means disconnected @@ -145,7 +175,9 @@ def precompute_dist_data(edge_index, num_nodes, approximate=0): n = num_nodes dists_array = np.zeros((n, n)) - dists_dict = all_pairs_shortest_path(graph, cutoff=approximate if approximate > 0 else None) + dists_dict = all_pairs_shortest_path( + graph, cutoff=approximate if approximate > 0 else None + ) node_list = graph.nodes() for node_i in node_list: shortest_dist = dists_dict[node_i] @@ -155,24 +187,36 @@ def precompute_dist_data(edge_index, num_nodes, approximate=0): dists_array[node_i, node_j] = 1 / (dist + 1) return dists_array + def get_dataset(args): # Generate graph data data_info = get_communities(args.inductive) # Get positive and negative edges - data = get_pos_neg_edges(data_info, infer_link_positive=True if args.task == 'link' else False) + data = get_pos_neg_edges( + data_info, infer_link_positive=True if args.task == "link" else False + ) # Pre-compute shortest path length - if args.task == 'link': - dists_removed = precompute_dist_data(data['positive_edges_train'], data['num_nodes'], - approximate=args.k_hop_dist) - data['dists'] = torch.from_numpy(dists_removed).float() - data['edge_index'] = torch.from_numpy(to_bidirected(data['positive_edges_train'])).long() + if args.task == "link": + dists_removed = precompute_dist_data( + data["positive_edges_train"], + data["num_nodes"], + approximate=args.k_hop_dist, + ) + data["dists"] = torch.from_numpy(dists_removed).float() + data["edge_index"] = torch.from_numpy( + to_bidirected(data["positive_edges_train"]) + ).long() else: - dists = precompute_dist_data(data['edge_index'].numpy(), data['num_nodes'], - approximate=args.k_hop_dist) - data['dists'] = torch.from_numpy(dists).float() + dists = precompute_dist_data( + data["edge_index"].numpy(), + data["num_nodes"], + approximate=args.k_hop_dist, + ) + data["dists"] = torch.from_numpy(dists).float() return data + def get_anchors(n): """Get a list of NumPy arrays, each of them is an anchor node set""" m = int(np.log2(n)) @@ -180,9 +224,12 @@ def get_anchors(n): for i in range(m): anchor_size = int(n / np.exp2(i + 1)) for _ in range(m): - anchor_set_id.append(np.random.choice(n, size=anchor_size, replace=False)) + anchor_set_id.append( + np.random.choice(n, size=anchor_size, replace=False) + ) return anchor_set_id + def get_dist_max(anchor_set_id, dist): # N x K, N is number of nodes, K is the number of anchor sets dist_max = torch.zeros((dist.shape[0], len(anchor_set_id))) @@ -198,6 +245,7 @@ def get_dist_max(anchor_set_id, dist): dist_argmax[:, i] = torch.index_select(temp_id, 0, dist_argmax_temp) return dist_max, dist_argmax + def get_a_graph(dists_max, dists_argmax): src = [] dst = [] @@ -207,7 +255,9 @@ def get_a_graph(dists_max, dists_argmax): dists_max = dists_max.numpy() for i in range(dists_max.shape[0]): # Get unique closest anchor nodes for node i across all anchor sets - tmp_dists_argmax, tmp_dists_argmax_idx = np.unique(dists_argmax[i, :], True) + tmp_dists_argmax, tmp_dists_argmax_idx = np.unique( + dists_argmax[i, :], True + ) src.extend([i] * tmp_dists_argmax.shape[0]) real_src.extend([i] * dists_argmax[i, :].shape[0]) real_dst.extend(list(dists_argmax[i, :].numpy())) @@ -218,13 +268,14 @@ def get_a_graph(dists_max, dists_argmax): g = (dst, src) return g, anchor_eid, edge_weight + def get_graphs(data, anchor_sets): graphs = [] anchor_eids = [] dists_max_list = [] edge_weights = [] for anchor_set in tqdm(anchor_sets, leave=False): - dists_max, dists_argmax = get_dist_max(anchor_set, data['dists']) + dists_max, dists_argmax = get_dist_max(anchor_set, data["dists"]) g, anchor_eid, edge_weight = get_a_graph(dists_max, dists_argmax) graphs.append(g) anchor_eids.append(anchor_eid) @@ -233,6 +284,7 @@ def get_graphs(data, anchor_sets): return graphs, anchor_eids, dists_max_list, edge_weights + def merge_result(outputs): graphs = [] anchor_eids = [] @@ -247,14 +299,26 @@ def merge_result(outputs): return graphs, anchor_eids, dists_max_list, edge_weights + def preselect_anchor(data, args, num_workers=4): pool = get_context("spawn").Pool(processes=num_workers) # Pre-compute anchor sets, a collection of anchor sets per epoch - anchor_set_ids = [get_anchors(data['num_nodes']) for _ in range(args.epoch_num)] + anchor_set_ids = [ + get_anchors(data["num_nodes"]) for _ in range(args.epoch_num) + ] interval_size = len(anchor_set_ids) / num_workers - results = [pool.apply_async(get_graphs, args=( - data, anchor_set_ids[int(interval_size * i):int(interval_size * (i + 1))],)) - for i in range(num_workers)] + results = [ + pool.apply_async( + get_graphs, + args=( + data, + anchor_set_ids[ + int(interval_size * i) : int(interval_size * (i + 1)) + ], + ), + ) + for i in range(num_workers) + ] output = [p.get() for p in results] graphs, anchor_eids, dists_max_list, edge_weights = merge_result(output) diff --git a/examples/pytorch/infograph/utils.py b/examples/pytorch/infograph/utils.py index a0291a92409f..f0d95b4efd44 100644 --- a/examples/pytorch/infograph/utils.py +++ b/examples/pytorch/infograph/utils.py @@ -1,9 +1,10 @@ -''' Credit: https://github.com/fanyun-sun/InfoGraph ''' +""" Credit: https://github.com/fanyun-sun/InfoGraph """ + +import math import torch as th import torch.nn.functional as F -import math def get_positive_expectation(p_samples, average=True): """Computes the positive part of a JS Divergence. @@ -13,8 +14,8 @@ def get_positive_expectation(p_samples, average=True): Returns: th.Tensor """ - log_2 = math.log(2.) - Ep = log_2 - F.softplus(- p_samples) + log_2 = math.log(2.0) + Ep = log_2 - F.softplus(-p_samples) if average: return Ep.mean() @@ -30,7 +31,7 @@ def get_negative_expectation(q_samples, average=True): Returns: th.Tensor """ - log_2 = math.log(2.) + log_2 = math.log(2.0) Eq = F.softplus(-q_samples) + q_samples - log_2 if average: @@ -51,8 +52,8 @@ def local_global_loss_(l_enc, g_enc, graph_id): for nodeidx, graphidx in enumerate(graph_id): - pos_mask[nodeidx][graphidx] = 1. - neg_mask[nodeidx][graphidx] = 0. + pos_mask[nodeidx][graphidx] = 1.0 + neg_mask[nodeidx][graphidx] = 0.0 res = th.mm(l_enc, g_enc.t()) diff --git a/examples/pytorch/jknet/main.py b/examples/pytorch/jknet/main.py index 3a7c32ccb37a..e6683ee6c460 100644 --- a/examples/pytorch/jknet/main.py +++ b/examples/pytorch/jknet/main.py @@ -2,39 +2,45 @@ import argparse import copy + +import numpy as np import torch -import torch.optim as optim import torch.nn as nn -import numpy as np - -from dgl.data import CoraGraphDataset, CiteseerGraphDataset -from tqdm import trange -from sklearn.model_selection import train_test_split +import torch.optim as optim from model import JKNet +from sklearn.model_selection import train_test_split +from tqdm import trange + +from dgl.data import CiteseerGraphDataset, CoraGraphDataset + def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset - if args.dataset == 'Cora': + if args.dataset == "Cora": dataset = CoraGraphDataset() - elif args.dataset == 'Citeseer': + elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() else: - raise ValueError('Dataset {} is invalid.'.format(args.dataset)) - + raise ValueError("Dataset {} is invalid.".format(args.dataset)) + graph = dataset[0] # check cuda - device = f'cuda:{args.gpu}' if args.gpu >= 0 and torch.cuda.is_available() else 'cpu' + device = ( + f"cuda:{args.gpu}" + if args.gpu >= 0 and torch.cuda.is_available() + else "cpu" + ) # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth - labels = graph.ndata.pop('label').to(device).long() + labels = graph.ndata.pop("label").to(device).long() # Extract node features - feats = graph.ndata.pop('feat').to(device) + feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # create masks for train / validation / test @@ -47,13 +53,15 @@ def main(args): graph = graph.to(device) # Step 2: Create model =================================================================== # - model = JKNet(in_dim=n_features, - hid_dim=args.hid_dim, - out_dim=n_classes, - num_layers=args.num_layers, - mode=args.mode, - dropout=args.dropout).to(device) - + model = JKNet( + in_dim=n_features, + hid_dim=args.hid_dim, + out_dim=n_classes, + num_layers=args.num_layers, + mode=args.mode, + dropout=args.dropout, + ).to(device) + best_model = copy.deepcopy(model) # Step 3: Create training components ===================================================== # @@ -62,7 +70,7 @@ def main(args): # Step 4: training epochs =============================================================== # acc = 0 - epochs = trange(args.epochs, desc='Accuracy & Loss') + epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: # Training using a full graph @@ -72,7 +80,9 @@ def main(args): # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) - train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx) + train_acc = torch.sum( + logits[train_idx].argmax(dim=1) == labels[train_idx] + ).item() / len(train_idx) # backward opt.zero_grad() @@ -84,43 +94,74 @@ def main(args): with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) - valid_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx) + valid_acc = torch.sum( + logits[val_idx].argmax(dim=1) == labels[val_idx] + ).item() / len(val_idx) # Print out performance - epochs.set_description('Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}'.format( - train_acc, train_loss.item(), valid_acc, valid_loss.item())) - + epochs.set_description( + "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( + train_acc, train_loss.item(), valid_acc, valid_loss.item() + ) + ) + if valid_acc > acc: acc = valid_acc best_model = copy.deepcopy(model) best_model.eval() logits = best_model(graph, feats) - test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx) + test_acc = torch.sum( + logits[test_idx].argmax(dim=1) == labels[test_idx] + ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc + if __name__ == "__main__": """ JKNet Hyperparameters """ - parser = argparse.ArgumentParser(description='JKNet') + parser = argparse.ArgumentParser(description="JKNet") # data source params - parser.add_argument('--dataset', type=str, default='Cora', help='Name of dataset.') + parser.add_argument( + "--dataset", type=str, default="Cora", help="Name of dataset." + ) # cuda params - parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using CPU.') + parser.add_argument( + "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." + ) # training params - parser.add_argument('--run', type=int, default=10, help='Running times.') - parser.add_argument('--epochs', type=int, default=500, help='Training epochs.') - parser.add_argument('--lr', type=float, default=0.005, help='Learning rate.') - parser.add_argument('--lamb', type=float, default=0.0005, help='L2 reg.') + parser.add_argument("--run", type=int, default=10, help="Running times.") + parser.add_argument( + "--epochs", type=int, default=500, help="Training epochs." + ) + parser.add_argument( + "--lr", type=float, default=0.005, help="Learning rate." + ) + parser.add_argument("--lamb", type=float, default=0.0005, help="L2 reg.") # model params - parser.add_argument("--hid-dim", type=int, default=32, help='Hidden layer dimensionalities.') - parser.add_argument("--num-layers", type=int, default=5, help='Number of GCN layers.') - parser.add_argument("--mode", type=str, default='cat', help="Type of aggregation.", choices=['cat', 'max', 'lstm']) - parser.add_argument("--dropout", type=float, default=0.5, help='Dropout applied at all layers.') + parser.add_argument( + "--hid-dim", type=int, default=32, help="Hidden layer dimensionalities." + ) + parser.add_argument( + "--num-layers", type=int, default=5, help="Number of GCN layers." + ) + parser.add_argument( + "--mode", + type=str, + default="cat", + help="Type of aggregation.", + choices=["cat", "max", "lstm"], + ) + parser.add_argument( + "--dropout", + type=float, + default=0.5, + help="Dropout applied at all layers.", + ) args = parser.parse_args() print(args) @@ -132,6 +173,6 @@ def main(args): mean = np.around(np.mean(acc_lists, axis=0), decimals=3) std = np.around(np.std(acc_lists, axis=0), decimals=3) - print('total acc: ', acc_lists) - print('mean', mean) - print('std', std) + print("total acc: ", acc_lists) + print("mean", mean) + print("std", std) diff --git a/examples/pytorch/jknet/model.py b/examples/pytorch/jknet/model.py index 40512b170754..eed4a1ed8504 100644 --- a/examples/pytorch/jknet/model.py +++ b/examples/pytorch/jknet/model.py @@ -1,17 +1,15 @@ import torch import torch.nn as nn import torch.nn.functional as F + import dgl.function as fn from dgl.nn import GraphConv, JumpingKnowledge + class JKNet(nn.Module): - def __init__(self, - in_dim, - hid_dim, - out_dim, - num_layers=1, - mode='cat', - dropout=0.): + def __init__( + self, in_dim, hid_dim, out_dim, num_layers=1, mode="cat", dropout=0.0 + ): super(JKNet, self).__init__() self.mode = mode @@ -21,12 +19,12 @@ def __init__(self, for _ in range(num_layers): self.layers.append(GraphConv(hid_dim, hid_dim, activation=F.relu)) - if self.mode == 'lstm': + if self.mode == "lstm": self.jump = JumpingKnowledge(mode, hid_dim, num_layers) else: self.jump = JumpingKnowledge(mode) - if self.mode == 'cat': + if self.mode == "cat": hid_dim = hid_dim * (num_layers + 1) self.output = nn.Linear(hid_dim, out_dim) @@ -44,7 +42,7 @@ def forward(self, g, feats): feats = self.dropout(layer(g, feats)) feat_lst.append(feats) - g.ndata['h'] = self.jump(feat_lst) - g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) + g.ndata["h"] = self.jump(feat_lst) + g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) - return self.output(g.ndata['h']) + return self.output(g.ndata["h"]) diff --git a/examples/pytorch/jtnn/jtnn/__init__.py b/examples/pytorch/jtnn/jtnn/__init__.py index b10e6bda5625..f1dfe61d3872 100644 --- a/examples/pytorch/jtnn/jtnn/__init__.py +++ b/examples/pytorch/jtnn/jtnn/__init__.py @@ -1,6 +1,6 @@ -from .mol_tree import Vocab +from .chemutils import decode_stereo +from .datautils import JTNNCollator, JTNNDataset from .jtnn_vae import DGLJTNNVAE +from .mol_tree import Vocab from .mpn import DGLMPN from .nnutils import cuda -from .datautils import JTNNDataset, JTNNCollator -from .chemutils import decode_stereo diff --git a/examples/pytorch/jtnn/jtnn/chemutils.py b/examples/pytorch/jtnn/jtnn/chemutils.py index 215779f1b901..88ab8c21850f 100644 --- a/examples/pytorch/jtnn/jtnn/chemutils.py +++ b/examples/pytorch/jtnn/jtnn/chemutils.py @@ -1,43 +1,59 @@ +from collections import defaultdict + import rdkit.Chem as Chem +from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers from scipy.sparse import csr_matrix from scipy.sparse.csgraph import minimum_spanning_tree -from collections import defaultdict -from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers -MST_MAX_WEIGHT = 100 +MST_MAX_WEIGHT = 100 MAX_NCAND = 2000 + def set_atommap(mol, num=0): for atom in mol.GetAtoms(): atom.SetAtomMapNum(num) + def get_mol(smiles): mol = Chem.MolFromSmiles(smiles) - if mol is None: + if mol is None: return None Chem.Kekulize(mol) return mol + def get_smiles(mol): return Chem.MolToSmiles(mol, kekuleSmiles=True) + def decode_stereo(smiles2D): mol = Chem.MolFromSmiles(smiles2D) dec_isomers = list(EnumerateStereoisomers(mol)) - dec_isomers = [Chem.MolFromSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) for mol in dec_isomers] - smiles3D = [Chem.MolToSmiles(mol, isomericSmiles=True) for mol in dec_isomers] - - chiralN = [atom.GetIdx() for atom in dec_isomers[0].GetAtoms() - if int(atom.GetChiralTag()) > 0 and atom.GetSymbol() == "N"] + dec_isomers = [ + Chem.MolFromSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) + for mol in dec_isomers + ] + smiles3D = [ + Chem.MolToSmiles(mol, isomericSmiles=True) for mol in dec_isomers + ] + + chiralN = [ + atom.GetIdx() + for atom in dec_isomers[0].GetAtoms() + if int(atom.GetChiralTag()) > 0 and atom.GetSymbol() == "N" + ] if len(chiralN) > 0: for mol in dec_isomers: for idx in chiralN: - mol.GetAtomWithIdx(idx).SetChiralTag(Chem.rdchem.ChiralType.CHI_UNSPECIFIED) + mol.GetAtomWithIdx(idx).SetChiralTag( + Chem.rdchem.ChiralType.CHI_UNSPECIFIED + ) smiles3D.append(Chem.MolToSmiles(mol, isomericSmiles=True)) return smiles3D + def sanitize(mol): try: smiles = get_smiles(mol) @@ -46,14 +62,16 @@ def sanitize(mol): return None return mol + def copy_atom(atom): new_atom = Chem.Atom(atom.GetSymbol()) new_atom.SetFormalCharge(atom.GetFormalCharge()) new_atom.SetAtomMapNum(atom.GetAtomMapNum()) return new_atom + def copy_edit_mol(mol): - new_mol = Chem.RWMol(Chem.MolFromSmiles('')) + new_mol = Chem.RWMol(Chem.MolFromSmiles("")) for atom in mol.GetAtoms(): new_atom = copy_atom(atom) new_mol.AddAtom(new_atom) @@ -64,13 +82,15 @@ def copy_edit_mol(mol): new_mol.AddBond(a1, a2, bt) return new_mol + def get_clique_mol(mol, atoms): smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True) new_mol = Chem.MolFromSmiles(smiles, sanitize=False) new_mol = copy_edit_mol(new_mol).GetMol() - new_mol = sanitize(new_mol) #We assume this is not None + new_mol = sanitize(new_mol) # We assume this is not None return new_mol + def tree_decomp(mol): n_atoms = mol.GetNumAtoms() if n_atoms == 1: @@ -81,7 +101,7 @@ def tree_decomp(mol): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() if not bond.IsInRing(): - cliques.append([a1,a2]) + cliques.append([a1, a2]) ssr = [list(x) for x in Chem.GetSymmSSSR(mol)] cliques.extend(ssr) @@ -90,29 +110,31 @@ def tree_decomp(mol): for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) - - #Merge Rings with intersection > 2 atoms + + # Merge Rings with intersection > 2 atoms for i in range(len(cliques)): - if len(cliques[i]) <= 2: continue + if len(cliques[i]) <= 2: + continue for atom in cliques[i]: for j in nei_list[atom]: - if i >= j or len(cliques[j]) <= 2: continue + if i >= j or len(cliques[j]) <= 2: + continue inter = set(cliques[i]) & set(cliques[j]) if len(inter) > 2: cliques[i].extend(cliques[j]) cliques[i] = list(set(cliques[i])) cliques[j] = [] - + cliques = [c for c in cliques if len(c) > 0] nei_list = [[] for i in range(n_atoms)] for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) - - #Build edges and add singleton cliques + + # Build edges and add singleton cliques edges = defaultdict(int) for atom in range(n_atoms): - if len(nei_list[atom]) <= 1: + if len(nei_list[atom]) <= 1: continue cnei = nei_list[atom] bonds = [c for c in cnei if len(cliques[c]) == 2] @@ -122,37 +144,44 @@ def tree_decomp(mol): cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: - edges[(c1,c2)] = 1 - elif len(rings) > 2: #Multiple (n>2) complex rings + edges[(c1, c2)] = 1 + elif len(rings) > 2: # Multiple (n>2) complex rings cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: - edges[(c1,c2)] = MST_MAX_WEIGHT - 1 + edges[(c1, c2)] = MST_MAX_WEIGHT - 1 else: for i in range(len(cnei)): for j in range(i + 1, len(cnei)): - c1,c2 = cnei[i],cnei[j] + c1, c2 = cnei[i], cnei[j] inter = set(cliques[c1]) & set(cliques[c2]) - if edges[(c1,c2)] < len(inter): - edges[(c1,c2)] = len(inter) #cnei[i] < cnei[j] by construction + if edges[(c1, c2)] < len(inter): + edges[(c1, c2)] = len( + inter + ) # cnei[i] < cnei[j] by construction - edges = [u + (MST_MAX_WEIGHT-v,) for u,v in edges.items()] + edges = [u + (MST_MAX_WEIGHT - v,) for u, v in edges.items()] if len(edges) == 0: return cliques, edges - #Compute Maximum Spanning Tree - row,col,data = list(zip(*edges)) + # Compute Maximum Spanning Tree + row, col, data = list(zip(*edges)) n_clique = len(cliques) - clique_graph = csr_matrix( (data,(row,col)), shape=(n_clique,n_clique) ) + clique_graph = csr_matrix((data, (row, col)), shape=(n_clique, n_clique)) junc_tree = minimum_spanning_tree(clique_graph) - row,col = junc_tree.nonzero() - edges = [(row[i],col[i]) for i in range(len(row))] + row, col = junc_tree.nonzero() + edges = [(row[i], col[i]) for i in range(len(row))] return (cliques, edges) + def atom_equal(a1, a2): - return a1.GetSymbol() == a2.GetSymbol() and a1.GetFormalCharge() == a2.GetFormalCharge() + return ( + a1.GetSymbol() == a2.GetSymbol() + and a1.GetFormalCharge() == a2.GetFormalCharge() + ) + -#Bond type not considered because all aromatic (so SINGLE matches DOUBLE) +# Bond type not considered because all aromatic (so SINGLE matches DOUBLE) def ring_bond_equal(b1, b2, reverse=False): b1 = (b1.GetBeginAtom(), b1.GetEndAtom()) if reverse: @@ -161,10 +190,11 @@ def ring_bond_equal(b1, b2, reverse=False): b2 = (b2.GetBeginAtom(), b2.GetEndAtom()) return atom_equal(b1[0], b2[0]) and atom_equal(b1[1], b2[1]) + def attach_mols_nx(ctr_mol, neighbors, prev_nodes, nei_amap): - prev_nids = [node['nid'] for node in prev_nodes] + prev_nids = [node["nid"] for node in prev_nodes] for nei_node in prev_nodes + neighbors: - nei_id, nei_mol = nei_node['nid'], nei_node['mol'] + nei_id, nei_mol = nei_node["nid"], nei_node["mol"] amap = nei_amap[nei_id] for atom in nei_mol.GetAtoms(): if atom.GetIdx() not in amap: @@ -181,82 +211,116 @@ def attach_mols_nx(ctr_mol, neighbors, prev_nodes, nei_amap): a2 = amap[bond.GetEndAtom().GetIdx()] if ctr_mol.GetBondBetweenAtoms(a1, a2) is None: ctr_mol.AddBond(a1, a2, bond.GetBondType()) - elif nei_id in prev_nids: #father node overrides + elif nei_id in prev_nids: # father node overrides ctr_mol.RemoveBond(a1, a2) ctr_mol.AddBond(a1, a2, bond.GetBondType()) return ctr_mol + def local_attach_nx(ctr_mol, neighbors, prev_nodes, amap_list): ctr_mol = copy_edit_mol(ctr_mol) - nei_amap = {nei['nid']: {} for nei in prev_nodes + neighbors} + nei_amap = {nei["nid"]: {} for nei in prev_nodes + neighbors} - for nei_id,ctr_atom,nei_atom in amap_list: + for nei_id, ctr_atom, nei_atom in amap_list: nei_amap[nei_id][nei_atom] = ctr_atom ctr_mol = attach_mols_nx(ctr_mol, neighbors, prev_nodes, nei_amap) return ctr_mol.GetMol() -#This version records idx mapping between ctr_mol and nei_mol + +# This version records idx mapping between ctr_mol and nei_mol def enum_attach_nx(ctr_mol, nei_node, amap, singletons): - nei_mol,nei_idx = nei_node['mol'], nei_node['nid'] + nei_mol, nei_idx = nei_node["mol"], nei_node["nid"] att_confs = [] - black_list = [atom_idx for nei_id,atom_idx,_ in amap if nei_id in singletons] - ctr_atoms = [atom for atom in ctr_mol.GetAtoms() if atom.GetIdx() not in black_list] + black_list = [ + atom_idx for nei_id, atom_idx, _ in amap if nei_id in singletons + ] + ctr_atoms = [ + atom for atom in ctr_mol.GetAtoms() if atom.GetIdx() not in black_list + ] ctr_bonds = [bond for bond in ctr_mol.GetBonds()] - if nei_mol.GetNumBonds() == 0: #neighbor singleton + if nei_mol.GetNumBonds() == 0: # neighbor singleton nei_atom = nei_mol.GetAtomWithIdx(0) - used_list = [atom_idx for _,atom_idx,_ in amap] + used_list = [atom_idx for _, atom_idx, _ in amap] for atom in ctr_atoms: if atom_equal(atom, nei_atom) and atom.GetIdx() not in used_list: new_amap = amap + [(nei_idx, atom.GetIdx(), 0)] - att_confs.append( new_amap ) - - elif nei_mol.GetNumBonds() == 1: #neighbor is a bond + att_confs.append(new_amap) + + elif nei_mol.GetNumBonds() == 1: # neighbor is a bond bond = nei_mol.GetBondWithIdx(0) bond_val = int(bond.GetBondTypeAsDouble()) - b1,b2 = bond.GetBeginAtom(), bond.GetEndAtom() + b1, b2 = bond.GetBeginAtom(), bond.GetEndAtom() - for atom in ctr_atoms: - #Optimize if atom is carbon (other atoms may change valence) + for atom in ctr_atoms: + # Optimize if atom is carbon (other atoms may change valence) if atom.GetAtomicNum() == 6 and atom.GetTotalNumHs() < bond_val: continue if atom_equal(atom, b1): new_amap = amap + [(nei_idx, atom.GetIdx(), b1.GetIdx())] - att_confs.append( new_amap ) + att_confs.append(new_amap) elif atom_equal(atom, b2): new_amap = amap + [(nei_idx, atom.GetIdx(), b2.GetIdx())] - att_confs.append( new_amap ) - else: - #intersection is an atom + att_confs.append(new_amap) + else: + # intersection is an atom for a1 in ctr_atoms: for a2 in nei_mol.GetAtoms(): if atom_equal(a1, a2): - #Optimize if atom is carbon (other atoms may change valence) - if a1.GetAtomicNum() == 6 and a1.GetTotalNumHs() + a2.GetTotalNumHs() < 4: + # Optimize if atom is carbon (other atoms may change valence) + if ( + a1.GetAtomicNum() == 6 + and a1.GetTotalNumHs() + a2.GetTotalNumHs() < 4 + ): continue new_amap = amap + [(nei_idx, a1.GetIdx(), a2.GetIdx())] - att_confs.append( new_amap ) + att_confs.append(new_amap) - #intersection is an bond + # intersection is an bond if ctr_mol.GetNumBonds() > 1: for b1 in ctr_bonds: for b2 in nei_mol.GetBonds(): if ring_bond_equal(b1, b2): - new_amap = amap + [(nei_idx, b1.GetBeginAtom().GetIdx(), b2.GetBeginAtom().GetIdx()), - (nei_idx, b1.GetEndAtom().GetIdx(), b2.GetEndAtom().GetIdx())] - att_confs.append( new_amap ) + new_amap = amap + [ + ( + nei_idx, + b1.GetBeginAtom().GetIdx(), + b2.GetBeginAtom().GetIdx(), + ), + ( + nei_idx, + b1.GetEndAtom().GetIdx(), + b2.GetEndAtom().GetIdx(), + ), + ] + att_confs.append(new_amap) if ring_bond_equal(b1, b2, reverse=True): - new_amap = amap + [(nei_idx, b1.GetBeginAtom().GetIdx(), b2.GetEndAtom().GetIdx()), - (nei_idx, b1.GetEndAtom().GetIdx(), b2.GetBeginAtom().GetIdx())] - att_confs.append( new_amap ) + new_amap = amap + [ + ( + nei_idx, + b1.GetBeginAtom().GetIdx(), + b2.GetEndAtom().GetIdx(), + ), + ( + nei_idx, + b1.GetEndAtom().GetIdx(), + b2.GetBeginAtom().GetIdx(), + ), + ] + att_confs.append(new_amap) return att_confs -#Try rings first: Speed-Up + +# Try rings first: Speed-Up def enum_assemble_nx(node, neighbors, prev_nodes=[], prev_amap=[]): all_attach_confs = [] - singletons = [nei_node['nid'] for nei_node in neighbors + prev_nodes if nei_node['mol'].GetNumAtoms() == 1] + singletons = [ + nei_node["nid"] + for nei_node in neighbors + prev_nodes + if nei_node["mol"].GetNumAtoms() == 1 + ] def search(cur_amap, depth): if len(all_attach_confs) > MAX_NCAND: @@ -266,11 +330,13 @@ def search(cur_amap, depth): return nei_node = neighbors[depth] - cand_amap = enum_attach_nx(node['mol'], nei_node, cur_amap, singletons) + cand_amap = enum_attach_nx(node["mol"], nei_node, cur_amap, singletons) cand_smiles = set() candidates = [] for amap in cand_amap: - cand_mol = local_attach_nx(node['mol'], neighbors[:depth+1], prev_nodes, amap) + cand_mol = local_attach_nx( + node["mol"], neighbors[: depth + 1], prev_nodes, amap + ) cand_mol = sanitize(cand_mol) if cand_mol is None: continue @@ -290,47 +356,69 @@ def search(cur_amap, depth): cand_smiles = set() candidates = [] for amap in all_attach_confs: - cand_mol = local_attach_nx(node['mol'], neighbors, prev_nodes, amap) + cand_mol = local_attach_nx(node["mol"], neighbors, prev_nodes, amap) cand_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cand_mol)) smiles = Chem.MolToSmiles(cand_mol) if smiles in cand_smiles: continue cand_smiles.add(smiles) Chem.Kekulize(cand_mol) - candidates.append( (smiles,cand_mol,amap) ) + candidates.append((smiles, cand_mol, amap)) return candidates -#Only used for debugging purpose -def dfs_assemble_nx(graph, cur_mol, global_amap, fa_amap, cur_node_id, fa_node_id): + +# Only used for debugging purpose +def dfs_assemble_nx( + graph, cur_mol, global_amap, fa_amap, cur_node_id, fa_node_id +): cur_node = graph.nodes_dict[cur_node_id] fa_node = graph.nodes_dict[fa_node_id] if fa_node_id is not None else None - fa_nid = fa_node['nid'] if fa_node is not None else -1 + fa_nid = fa_node["nid"] if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] - children_id = [nei for nei in graph[cur_node_id] if graph.nodes_dict[nei]['nid'] != fa_nid] + children_id = [ + nei + for nei in graph[cur_node_id] + if graph.nodes_dict[nei]["nid"] != fa_nid + ] children = [graph.nodes_dict[nei] for nei in children_id] - neighbors = [nei for nei in children if nei['mol'].GetNumAtoms() > 1] - neighbors = sorted(neighbors, key=lambda x:x['mol'].GetNumAtoms(), reverse=True) - singletons = [nei for nei in children if nei['mol'].GetNumAtoms() == 1] + neighbors = [nei for nei in children if nei["mol"].GetNumAtoms() > 1] + neighbors = sorted( + neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True + ) + singletons = [nei for nei in children if nei["mol"].GetNumAtoms() == 1] neighbors = singletons + neighbors - cur_amap = [(fa_nid,a2,a1) for nid,a1,a2 in fa_amap if nid == cur_node['nid']] - cands = enum_assemble_nx(graph.nodes_dict[cur_node_id], neighbors, prev_nodes, cur_amap) + cur_amap = [ + (fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node["nid"] + ] + cands = enum_assemble_nx( + graph.nodes_dict[cur_node_id], neighbors, prev_nodes, cur_amap + ) if len(cands) == 0: return cand_smiles, _, cand_amap = zip(*cands) - label_idx = cand_smiles.index(cur_node['label']) + label_idx = cand_smiles.index(cur_node["label"]) label_amap = cand_amap[label_idx] - for nei_id,ctr_atom,nei_atom in label_amap: + for nei_id, ctr_atom, nei_atom in label_amap: if nei_id == fa_nid: continue - global_amap[nei_id][nei_atom] = global_amap[cur_node['nid']][ctr_atom] - - cur_mol = attach_mols_nx(cur_mol, children, [], global_amap) #father is already attached + global_amap[nei_id][nei_atom] = global_amap[cur_node["nid"]][ctr_atom] + + cur_mol = attach_mols_nx( + cur_mol, children, [], global_amap + ) # father is already attached for nei_node_id, nei_node in zip(children_id, children): - if not nei_node['is_leaf']: - dfs_assemble_nx(graph, cur_mol, global_amap, label_amap, nei_node_id, cur_node_id) + if not nei_node["is_leaf"]: + dfs_assemble_nx( + graph, + cur_mol, + global_amap, + label_amap, + nei_node_id, + cur_node_id, + ) diff --git a/examples/pytorch/jtnn/jtnn/datautils.py b/examples/pytorch/jtnn/jtnn/datautils.py index 553b5403104c..e42c3cdd8552 100644 --- a/examples/pytorch/jtnn/jtnn/datautils.py +++ b/examples/pytorch/jtnn/jtnn/datautils.py @@ -2,41 +2,49 @@ from torch.utils.data import Dataset import dgl -from dgl.data.utils import download, extract_archive, get_download_dir, _get_dgl_url -from .mol_tree_nx import DGLMolTree -from .mol_tree import Vocab +from dgl.data.utils import ( + _get_dgl_url, + download, + extract_archive, + get_download_dir, +) -from .mpn import mol2dgl_single as mol2dgl_enc -from .jtmpn import mol2dgl_single as mol2dgl_dec from .jtmpn import ATOM_FDIM as ATOM_FDIM_DEC from .jtmpn import BOND_FDIM as BOND_FDIM_DEC +from .jtmpn import mol2dgl_single as mol2dgl_dec +from .mol_tree import Vocab +from .mol_tree_nx import DGLMolTree +from .mpn import mol2dgl_single as mol2dgl_enc + def _unpack_field(examples, field): return [e[field] for e in examples] + def _set_node_id(mol_tree, vocab): wid = [] for i, node in enumerate(mol_tree.nodes_dict): - mol_tree.nodes_dict[node]['idx'] = i - wid.append(vocab.get_index(mol_tree.nodes_dict[node]['smiles'])) + mol_tree.nodes_dict[node]["idx"] = i + wid.append(vocab.get_index(mol_tree.nodes_dict[node]["smiles"])) return wid + class JTNNDataset(Dataset): def __init__(self, data, vocab, training=True): self.dir = get_download_dir() - self.zip_file_path='{}/jtnn.zip'.format(self.dir) + self.zip_file_path = "{}/jtnn.zip".format(self.dir) - download(_get_dgl_url('dgllife/jtnn.zip'), path=self.zip_file_path) - extract_archive(self.zip_file_path, '{}/jtnn'.format(self.dir)) - print('Loading data...') - data_file = '{}/jtnn/{}.txt'.format(self.dir, data) + download(_get_dgl_url("dgllife/jtnn.zip"), path=self.zip_file_path) + extract_archive(self.zip_file_path, "{}/jtnn".format(self.dir)) + print("Loading data...") + data_file = "{}/jtnn/{}.txt".format(self.dir, data) with open(data_file) as f: self.data = [line.strip("\r\n ").split()[0] for line in f] - self.vocab_file = '{}/jtnn/{}.txt'.format(self.dir, vocab) - print('Loading finished.') - print('\tNum samples:', len(self.data)) - print('\tVocab file:', self.vocab_file) + self.vocab_file = "{}/jtnn/{}.txt".format(self.dir, vocab) + print("Loading finished.") + print("\tNum samples:", len(self.data)) + print("\tVocab file:", self.vocab_file) self.training = training self.vocab = Vocab([x.strip("\r\n ") for x in open(self.vocab_file)]) @@ -55,12 +63,12 @@ def __getitem__(self, idx): mol_graph, atom_x_enc, bond_x_enc = mol2dgl_enc(mol_tree.smiles) result = { - 'mol_tree': mol_tree, - 'mol_graph': mol_graph, - 'atom_x_enc': atom_x_enc, - 'bond_x_enc': bond_x_enc, - 'wid': wid, - } + "mol_tree": mol_tree, + "mol_graph": mol_graph, + "atom_x_enc": atom_x_enc, + "bond_x_enc": bond_x_enc, + "wid": wid, + } if not self.training: return result @@ -69,17 +77,24 @@ def __getitem__(self, idx): cands = [] for node_id, node in mol_tree.nodes_dict.items(): # fill in ground truth - if node['label'] not in node['cands']: - node['cands'].append(node['label']) - node['cand_mols'].append(node['label_mol']) + if node["label"] not in node["cands"]: + node["cands"].append(node["label"]) + node["cand_mols"].append(node["label_mol"]) - if node['is_leaf'] or len(node['cands']) == 1: + if node["is_leaf"] or len(node["cands"]) == 1: continue - cands.extend([(cand, mol_tree, node_id) - for cand in node['cand_mols']]) + cands.extend( + [(cand, mol_tree, node_id) for cand in node["cand_mols"]] + ) if len(cands) > 0: - cand_graphs, atom_x_dec, bond_x_dec, tree_mess_src_e, \ - tree_mess_tgt_e, tree_mess_tgt_n = mol2dgl_dec(cands) + ( + cand_graphs, + atom_x_dec, + bond_x_dec, + tree_mess_src_e, + tree_mess_tgt_e, + tree_mess_tgt_n, + ) = mol2dgl_dec(cands) else: cand_graphs = [] atom_x_dec = torch.zeros(0, ATOM_FDIM_DEC) @@ -95,8 +110,9 @@ def __getitem__(self, idx): cands.append(mol_tree.smiles3D) stereo_graphs = [mol2dgl_enc(c) for c in cands] - stereo_cand_graphs, stereo_atom_x_enc, stereo_bond_x_enc = \ - zip(*stereo_graphs) + stereo_cand_graphs, stereo_atom_x_enc, stereo_bond_x_enc = zip( + *stereo_graphs + ) stereo_atom_x_enc = torch.cat(stereo_atom_x_enc) stereo_bond_x_enc = torch.cat(stereo_bond_x_enc) stereo_cand_label = [(cands.index(mol_tree.smiles3D), len(cands))] @@ -106,21 +122,24 @@ def __getitem__(self, idx): stereo_bond_x_enc = torch.zeros(0, bond_x_enc.shape[1]) stereo_cand_label = [] - result.update({ - 'cand_graphs': cand_graphs, - 'atom_x_dec': atom_x_dec, - 'bond_x_dec': bond_x_dec, - 'tree_mess_src_e': tree_mess_src_e, - 'tree_mess_tgt_e': tree_mess_tgt_e, - 'tree_mess_tgt_n': tree_mess_tgt_n, - 'stereo_cand_graphs': stereo_cand_graphs, - 'stereo_atom_x_enc': stereo_atom_x_enc, - 'stereo_bond_x_enc': stereo_bond_x_enc, - 'stereo_cand_label': stereo_cand_label, - }) + result.update( + { + "cand_graphs": cand_graphs, + "atom_x_dec": atom_x_dec, + "bond_x_dec": bond_x_dec, + "tree_mess_src_e": tree_mess_src_e, + "tree_mess_tgt_e": tree_mess_tgt_e, + "tree_mess_tgt_n": tree_mess_tgt_n, + "stereo_cand_graphs": stereo_cand_graphs, + "stereo_atom_x_enc": stereo_atom_x_enc, + "stereo_bond_x_enc": stereo_bond_x_enc, + "stereo_cand_label": stereo_cand_label, + } + ) return result + class JTNNCollator(object): def __init__(self, vocab, training): self.vocab = vocab @@ -131,43 +150,45 @@ def _batch_and_set(graphs, atom_x, bond_x, flatten): if flatten: graphs = [g for f in graphs for g in f] graph_batch = dgl.batch(graphs) - graph_batch.ndata['x'] = atom_x - graph_batch.edata.update({ - 'x': bond_x, - 'src_x': atom_x.new(bond_x.shape[0], atom_x.shape[1]).zero_(), - }) + graph_batch.ndata["x"] = atom_x + graph_batch.edata.update( + { + "x": bond_x, + "src_x": atom_x.new(bond_x.shape[0], atom_x.shape[1]).zero_(), + } + ) return graph_batch def __call__(self, examples): # get list of trees - mol_trees = _unpack_field(examples, 'mol_tree') - wid = _unpack_field(examples, 'wid') + mol_trees = _unpack_field(examples, "mol_tree") + wid = _unpack_field(examples, "wid") for _wid, mol_tree in zip(wid, mol_trees): - mol_tree.graph.ndata['wid'] = torch.LongTensor(_wid) + mol_tree.graph.ndata["wid"] = torch.LongTensor(_wid) # TODO: either support pickling or get around ctypes pointers using scipy # batch molecule graphs - mol_graphs = _unpack_field(examples, 'mol_graph') - atom_x = torch.cat(_unpack_field(examples, 'atom_x_enc')) - bond_x = torch.cat(_unpack_field(examples, 'bond_x_enc')) + mol_graphs = _unpack_field(examples, "mol_graph") + atom_x = torch.cat(_unpack_field(examples, "atom_x_enc")) + bond_x = torch.cat(_unpack_field(examples, "bond_x_enc")) mol_graph_batch = self._batch_and_set(mol_graphs, atom_x, bond_x, False) result = { - 'mol_trees': mol_trees, - 'mol_graph_batch': mol_graph_batch, - } + "mol_trees": mol_trees, + "mol_graph_batch": mol_graph_batch, + } if not self.training: return result # batch candidate graphs - cand_graphs = _unpack_field(examples, 'cand_graphs') + cand_graphs = _unpack_field(examples, "cand_graphs") cand_batch_idx = [] - atom_x = torch.cat(_unpack_field(examples, 'atom_x_dec')) - bond_x = torch.cat(_unpack_field(examples, 'bond_x_dec')) - tree_mess_src_e = _unpack_field(examples, 'tree_mess_src_e') - tree_mess_tgt_e = _unpack_field(examples, 'tree_mess_tgt_e') - tree_mess_tgt_n = _unpack_field(examples, 'tree_mess_tgt_n') + atom_x = torch.cat(_unpack_field(examples, "atom_x_dec")) + bond_x = torch.cat(_unpack_field(examples, "bond_x_dec")) + tree_mess_src_e = _unpack_field(examples, "tree_mess_src_e") + tree_mess_tgt_e = _unpack_field(examples, "tree_mess_tgt_e") + tree_mess_tgt_n = _unpack_field(examples, "tree_mess_tgt_n") n_graph_nodes = 0 n_tree_nodes = 0 @@ -182,41 +203,46 @@ def __call__(self, examples): tree_mess_src_e = torch.cat(tree_mess_src_e) tree_mess_tgt_n = torch.cat(tree_mess_tgt_n) - cand_graph_batch = self._batch_and_set(cand_graphs, atom_x, bond_x, True) + cand_graph_batch = self._batch_and_set( + cand_graphs, atom_x, bond_x, True + ) # batch stereoisomers - stereo_cand_graphs = _unpack_field(examples, 'stereo_cand_graphs') - atom_x = torch.cat(_unpack_field(examples, 'stereo_atom_x_enc')) - bond_x = torch.cat(_unpack_field(examples, 'stereo_bond_x_enc')) + stereo_cand_graphs = _unpack_field(examples, "stereo_cand_graphs") + atom_x = torch.cat(_unpack_field(examples, "stereo_atom_x_enc")) + bond_x = torch.cat(_unpack_field(examples, "stereo_bond_x_enc")) stereo_cand_batch_idx = [] for i in range(len(stereo_cand_graphs)): stereo_cand_batch_idx.extend([i] * len(stereo_cand_graphs[i])) if len(stereo_cand_batch_idx) > 0: stereo_cand_labels = [ - (label, length) - for ex in _unpack_field(examples, 'stereo_cand_label') - for label, length in ex - ] + (label, length) + for ex in _unpack_field(examples, "stereo_cand_label") + for label, length in ex + ] stereo_cand_labels, stereo_cand_lengths = zip(*stereo_cand_labels) stereo_cand_graph_batch = self._batch_and_set( - stereo_cand_graphs, atom_x, bond_x, True) + stereo_cand_graphs, atom_x, bond_x, True + ) else: stereo_cand_labels = [] stereo_cand_lengths = [] stereo_cand_graph_batch = None stereo_cand_batch_idx = [] - result.update({ - 'cand_graph_batch': cand_graph_batch, - 'cand_batch_idx': cand_batch_idx, - 'tree_mess_tgt_e': tree_mess_tgt_e, - 'tree_mess_src_e': tree_mess_src_e, - 'tree_mess_tgt_n': tree_mess_tgt_n, - 'stereo_cand_graph_batch': stereo_cand_graph_batch, - 'stereo_cand_batch_idx': stereo_cand_batch_idx, - 'stereo_cand_labels': stereo_cand_labels, - 'stereo_cand_lengths': stereo_cand_lengths, - }) + result.update( + { + "cand_graph_batch": cand_graph_batch, + "cand_batch_idx": cand_batch_idx, + "tree_mess_tgt_e": tree_mess_tgt_e, + "tree_mess_src_e": tree_mess_src_e, + "tree_mess_tgt_n": tree_mess_tgt_n, + "stereo_cand_graph_batch": stereo_cand_graph_batch, + "stereo_cand_batch_idx": stereo_cand_batch_idx, + "stereo_cand_labels": stereo_cand_labels, + "stereo_cand_lengths": stereo_cand_lengths, + } + ) return result diff --git a/examples/pytorch/jtnn/jtnn/jtnn_enc.py b/examples/pytorch/jtnn/jtnn/jtnn_enc.py index 9d86b9019666..70c26d59dac8 100644 --- a/examples/pytorch/jtnn/jtnn/jtnn_enc.py +++ b/examples/pytorch/jtnn/jtnn/jtnn_enc.py @@ -1,12 +1,15 @@ +import numpy as np import torch import torch.nn as nn -from .nnutils import GRUUpdate, cuda, tocpu -from dgl import batch, bfs_edges_generator, line_graph + import dgl.function as DGLF -import numpy as np +from dgl import batch, bfs_edges_generator, line_graph + +from .nnutils import GRUUpdate, cuda, tocpu MAX_NB = 8 + def level_order(forest, roots): forest = tocpu(forest) edges = bfs_edges_generator(forest, roots) @@ -18,6 +21,7 @@ def level_order(forest, roots): yield from reversed(edges_back) yield from edges + class EncoderGatherUpdate(nn.Module): def __init__(self, hidden_size): nn.Module.__init__(self) @@ -26,10 +30,10 @@ def __init__(self, hidden_size): self.W = nn.Linear(2 * hidden_size, hidden_size) def forward(self, nodes): - x = nodes.data['x'] - m = nodes.data['m'] + x = nodes.data["x"] + m = nodes.data["m"] return { - 'h': torch.relu(self.W(torch.cat([x, m], 1))), + "h": torch.relu(self.W(torch.cat([x, m], 1))), } @@ -39,7 +43,7 @@ def __init__(self, vocab, hidden_size, embedding=None): self.hidden_size = hidden_size self.vocab_size = vocab.size() self.vocab = vocab - + if embedding is None: self.embedding = nn.Embedding(self.vocab_size, hidden_size) else: @@ -50,44 +54,55 @@ def __init__(self, vocab, hidden_size, embedding=None): def forward(self, mol_trees): mol_tree_batch = batch(mol_trees) - + # Build line graph to prepare for belief propagation - mol_tree_batch_lg = line_graph(mol_tree_batch, backtracking=False, shared=True) + mol_tree_batch_lg = line_graph( + mol_tree_batch, backtracking=False, shared=True + ) return self.run(mol_tree_batch, mol_tree_batch_lg) def run(self, mol_tree_batch, mol_tree_batch_lg): # Since tree roots are designated to 0. In the batched graph we can # simply find the corresponding node ID by looking at node_offset - node_offset = np.cumsum(np.insert(mol_tree_batch.batch_num_nodes().cpu().numpy(), 0, 0)) + node_offset = np.cumsum( + np.insert(mol_tree_batch.batch_num_nodes().cpu().numpy(), 0, 0) + ) root_ids = node_offset[:-1] n_nodes = mol_tree_batch.number_of_nodes() n_edges = mol_tree_batch.number_of_edges() # Assign structure embeddings to tree nodes - mol_tree_batch.ndata.update({ - 'x': self.embedding(mol_tree_batch.ndata['wid']), - 'm': cuda(torch.zeros(n_nodes, self.hidden_size)), - 'h': cuda(torch.zeros(n_nodes, self.hidden_size)), - }) + mol_tree_batch.ndata.update( + { + "x": self.embedding(mol_tree_batch.ndata["wid"]), + "m": cuda(torch.zeros(n_nodes, self.hidden_size)), + "h": cuda(torch.zeros(n_nodes, self.hidden_size)), + } + ) # Initialize the intermediate variables according to Eq (4)-(8). # Also initialize the src_x and dst_x fields. # TODO: context? - mol_tree_batch.edata.update({ - 's': cuda(torch.zeros(n_edges, self.hidden_size)), - 'm': cuda(torch.zeros(n_edges, self.hidden_size)), - 'r': cuda(torch.zeros(n_edges, self.hidden_size)), - 'z': cuda(torch.zeros(n_edges, self.hidden_size)), - 'src_x': cuda(torch.zeros(n_edges, self.hidden_size)), - 'dst_x': cuda(torch.zeros(n_edges, self.hidden_size)), - 'rm': cuda(torch.zeros(n_edges, self.hidden_size)), - 'accum_rm': cuda(torch.zeros(n_edges, self.hidden_size)), - }) + mol_tree_batch.edata.update( + { + "s": cuda(torch.zeros(n_edges, self.hidden_size)), + "m": cuda(torch.zeros(n_edges, self.hidden_size)), + "r": cuda(torch.zeros(n_edges, self.hidden_size)), + "z": cuda(torch.zeros(n_edges, self.hidden_size)), + "src_x": cuda(torch.zeros(n_edges, self.hidden_size)), + "dst_x": cuda(torch.zeros(n_edges, self.hidden_size)), + "rm": cuda(torch.zeros(n_edges, self.hidden_size)), + "accum_rm": cuda(torch.zeros(n_edges, self.hidden_size)), + } + ) # Send the source/destination node features to edges mol_tree_batch.apply_edges( - func=lambda edges: {'src_x': edges.src['x'], 'dst_x': edges.dst['x']}, + func=lambda edges: { + "src_x": edges.src["x"], + "dst_x": edges.dst["x"], + }, ) # Message passing @@ -98,15 +113,19 @@ def run(self, mol_tree_batch, mol_tree_batch_lg): mol_tree_batch_lg.ndata.update(mol_tree_batch.edata) for eid in level_order(mol_tree_batch, root_ids): eid = eid.to(mol_tree_batch_lg.device) - mol_tree_batch_lg.pull(eid, DGLF.copy_u('m', 'm'), DGLF.sum('m', 's')) - mol_tree_batch_lg.pull(eid, DGLF.copy_u('rm', 'rm'), DGLF.sum('rm', 'accum_rm')) + mol_tree_batch_lg.pull( + eid, DGLF.copy_u("m", "m"), DGLF.sum("m", "s") + ) + mol_tree_batch_lg.pull( + eid, DGLF.copy_u("rm", "rm"), DGLF.sum("rm", "accum_rm") + ) mol_tree_batch_lg.apply_nodes(self.enc_tree_update, v=eid) # Readout mol_tree_batch.edata.update(mol_tree_batch_lg.ndata) - mol_tree_batch.update_all(DGLF.copy_e('m', 'm'), DGLF.sum('m', 'm')) + mol_tree_batch.update_all(DGLF.copy_e("m", "m"), DGLF.sum("m", "m")) mol_tree_batch.apply_nodes(self.enc_tree_gather_update) - root_vecs = mol_tree_batch.nodes[root_ids].data['h'] + root_vecs = mol_tree_batch.nodes[root_ids].data["h"] return mol_tree_batch, root_vecs diff --git a/examples/pytorch/jtnn/jtnn/jtnn_vae.py b/examples/pytorch/jtnn/jtnn/jtnn_vae.py index 572bc4d0cf49..79b63bd639b1 100644 --- a/examples/pytorch/jtnn/jtnn/jtnn_vae.py +++ b/examples/pytorch/jtnn/jtnn/jtnn_vae.py @@ -1,23 +1,29 @@ +import copy + +import rdkit.Chem as Chem import torch import torch.nn as nn import torch.nn.functional as F -from .nnutils import cuda -from .chemutils import set_atommap, copy_edit_mol, enum_assemble_nx, \ - attach_mols_nx, decode_stereo -from .jtnn_enc import DGLJTNNEncoder + +from dgl import batch, unbatch + +from .chemutils import ( + attach_mols_nx, + copy_edit_mol, + decode_stereo, + enum_assemble_nx, + set_atommap, +) +from .jtmpn import DGLJTMPN +from .jtmpn import mol2dgl_single as mol2dgl_dec from .jtnn_dec import DGLJTNNDecoder +from .jtnn_enc import DGLJTNNEncoder from .mpn import DGLMPN from .mpn import mol2dgl_single as mol2dgl_enc -from .jtmpn import DGLJTMPN -from .jtmpn import mol2dgl_single as mol2dgl_dec - -import rdkit.Chem as Chem -import copy +from .nnutils import cuda -from dgl import batch, unbatch class DGLJTNNVAE(nn.Module): - def __init__(self, vocab, hidden_size, latent_size, depth): super(DGLJTNNVAE, self).__init__() self.vocab = vocab @@ -29,7 +35,8 @@ def __init__(self, vocab, hidden_size, latent_size, depth): self.mpn = DGLMPN(hidden_size, depth) self.jtnn = DGLJTNNEncoder(vocab, hidden_size, self.embedding) self.decoder = DGLJTNNDecoder( - vocab, hidden_size, latent_size // 2, self.embedding) + vocab, hidden_size, latent_size // 2, self.embedding + ) self.jtmpn = DGLJTMPN(hidden_size, depth) self.T_mean = nn.Linear(hidden_size, latent_size // 2) @@ -44,24 +51,32 @@ def __init__(self, vocab, hidden_size, latent_size, depth): @staticmethod def move_to_cuda(mol_batch): - for i in range(len(mol_batch['mol_trees'])): - mol_batch['mol_trees'][i].graph = cuda(mol_batch['mol_trees'][i].graph) - - mol_batch['mol_graph_batch'] = cuda(mol_batch['mol_graph_batch']) - if 'cand_graph_batch' in mol_batch: - mol_batch['cand_graph_batch'] = cuda(mol_batch['cand_graph_batch']) - if mol_batch.get('stereo_cand_graph_batch') is not None: - mol_batch['stereo_cand_graph_batch'] = cuda(mol_batch['stereo_cand_graph_batch']) + for i in range(len(mol_batch["mol_trees"])): + mol_batch["mol_trees"][i].graph = cuda( + mol_batch["mol_trees"][i].graph + ) + + mol_batch["mol_graph_batch"] = cuda(mol_batch["mol_graph_batch"]) + if "cand_graph_batch" in mol_batch: + mol_batch["cand_graph_batch"] = cuda(mol_batch["cand_graph_batch"]) + if mol_batch.get("stereo_cand_graph_batch") is not None: + mol_batch["stereo_cand_graph_batch"] = cuda( + mol_batch["stereo_cand_graph_batch"] + ) def encode(self, mol_batch): - mol_graphs = mol_batch['mol_graph_batch'] + mol_graphs = mol_batch["mol_graph_batch"] mol_vec = self.mpn(mol_graphs) - mol_tree_batch, tree_vec = self.jtnn([t.graph for t in mol_batch['mol_trees']]) + mol_tree_batch, tree_vec = self.jtnn( + [t.graph for t in mol_batch["mol_trees"]] + ) self.n_nodes_total += mol_graphs.number_of_nodes() self.n_edges_total += mol_graphs.number_of_edges() - self.n_tree_nodes_total += sum(t.graph.number_of_nodes() for t in mol_batch['mol_trees']) + self.n_tree_nodes_total += sum( + t.graph.number_of_nodes() for t in mol_batch["mol_trees"] + ) self.n_passes += 1 return mol_tree_batch, tree_vec, mol_vec @@ -85,31 +100,45 @@ def sample(self, tree_vec, mol_vec, e1=None, e2=None): def forward(self, mol_batch, beta=0, e1=None, e2=None): self.move_to_cuda(mol_batch) - mol_trees = mol_batch['mol_trees'] + mol_trees = mol_batch["mol_trees"] batch_size = len(mol_trees) mol_tree_batch, tree_vec, mol_vec = self.encode(mol_batch) - tree_vec, mol_vec, z_mean, z_log_var = self.sample(tree_vec, mol_vec, e1, e2) - kl_loss = -0.5 * torch.sum(1.0 + z_log_var - z_mean * z_mean - torch.exp(z_log_var)) / batch_size - - word_loss, topo_loss, word_acc, topo_acc = self.decoder([t.graph for t in mol_trees], tree_vec) + tree_vec, mol_vec, z_mean, z_log_var = self.sample( + tree_vec, mol_vec, e1, e2 + ) + kl_loss = ( + -0.5 + * torch.sum( + 1.0 + z_log_var - z_mean * z_mean - torch.exp(z_log_var) + ) + / batch_size + ) + + word_loss, topo_loss, word_acc, topo_acc = self.decoder( + [t.graph for t in mol_trees], tree_vec + ) assm_loss, assm_acc = self.assm(mol_batch, mol_tree_batch, mol_vec) stereo_loss, stereo_acc = self.stereo(mol_batch, mol_vec) - loss = word_loss + topo_loss + assm_loss + 2 * stereo_loss + beta * kl_loss + loss = ( + word_loss + topo_loss + assm_loss + 2 * stereo_loss + beta * kl_loss + ) return loss, kl_loss, word_acc, topo_acc, assm_acc, stereo_acc def assm(self, mol_batch, mol_tree_batch, mol_vec): - cands = [mol_batch['cand_graph_batch'], - cuda(mol_batch['tree_mess_src_e']), - cuda(mol_batch['tree_mess_tgt_e']), - cuda(mol_batch['tree_mess_tgt_n'])] + cands = [ + mol_batch["cand_graph_batch"], + cuda(mol_batch["tree_mess_src_e"]), + cuda(mol_batch["tree_mess_tgt_e"]), + cuda(mol_batch["tree_mess_tgt_n"]), + ] cand_vec = self.jtmpn(cands, mol_tree_batch) cand_vec = self.G_mean(cand_vec) - batch_idx = cuda(torch.LongTensor(mol_batch['cand_batch_idx'])) + batch_idx = cuda(torch.LongTensor(mol_batch["cand_batch_idx"])) mol_vec = mol_vec[batch_idx] mol_vec = mol_vec.view(-1, 1, self.latent_size // 2) @@ -118,16 +147,19 @@ def assm(self, mol_batch, mol_tree_batch, mol_vec): cnt, tot, acc = 0, 0, 0 all_loss = [] - for i, mol_tree in enumerate(mol_batch['mol_trees']): - comp_nodes = [node_id for node_id, node in mol_tree.nodes_dict.items() - if len(node['cands']) > 1 and not node['is_leaf']] + for i, mol_tree in enumerate(mol_batch["mol_trees"]): + comp_nodes = [ + node_id + for node_id, node in mol_tree.nodes_dict.items() + if len(node["cands"]) > 1 and not node["is_leaf"] + ] cnt += len(comp_nodes) # segmented accuracy and cross entropy for node_id in comp_nodes: node = mol_tree.nodes_dict[node_id] - label = node['cands'].index(node['label']) - ncand = len(node['cands']) - cur_score = scores[tot:tot+ncand] + label = node["cands"].index(node["label"]) + ncand = len(node["cands"]) + cur_score = scores[tot : tot + ncand] tot += ncand if cur_score[label].item() >= cur_score.max().item(): @@ -135,20 +167,23 @@ def assm(self, mol_batch, mol_tree_batch, mol_vec): label = cuda(torch.LongTensor([label])) all_loss.append( - F.cross_entropy(cur_score.view(1, -1), label, size_average=False)) + F.cross_entropy( + cur_score.view(1, -1), label, size_average=False + ) + ) - all_loss = sum(all_loss) / len(mol_batch['mol_trees']) + all_loss = sum(all_loss) / len(mol_batch["mol_trees"]) return all_loss, acc / cnt def stereo(self, mol_batch, mol_vec): - stereo_cands = mol_batch['stereo_cand_graph_batch'] - batch_idx = mol_batch['stereo_cand_batch_idx'] - labels = mol_batch['stereo_cand_labels'] - lengths = mol_batch['stereo_cand_lengths'] + stereo_cands = mol_batch["stereo_cand_graph_batch"] + batch_idx = mol_batch["stereo_cand_batch_idx"] + labels = mol_batch["stereo_cand_labels"] + lengths = mol_batch["stereo_cand_lengths"] if len(labels) == 0: # Only one stereoisomer exists; do nothing - return cuda(torch.tensor(0.)), 1. + return cuda(torch.tensor(0.0)), 1.0 batch_idx = cuda(torch.LongTensor(batch_idx)) stereo_cands = self.mpn(stereo_cands) @@ -159,12 +194,15 @@ def stereo(self, mol_batch, mol_vec): st, acc = 0, 0 all_loss = [] for label, le in zip(labels, lengths): - cur_scores = scores[st:st+le] + cur_scores = scores[st : st + le] if cur_scores.data[label].item() >= cur_scores.max().item(): acc += 1 label = cuda(torch.LongTensor([label])) all_loss.append( - F.cross_entropy(cur_scores.view(1, -1), label, size_average=False)) + F.cross_entropy( + cur_scores.view(1, -1), label, size_average=False + ) + ) st += le all_loss = sum(all_loss) / len(labels) @@ -175,24 +213,32 @@ def decode(self, tree_vec, mol_vec): effective_nodes_list = effective_nodes.tolist() nodes_dict = [nodes_dict[v] for v in effective_nodes_list] - for i, (node_id, node) in enumerate(zip(effective_nodes_list, nodes_dict)): - node['idx'] = i - node['nid'] = i + 1 - node['is_leaf'] = True + for i, (node_id, node) in enumerate( + zip(effective_nodes_list, nodes_dict) + ): + node["idx"] = i + node["nid"] = i + 1 + node["is_leaf"] = True if mol_tree.graph.in_degrees(node_id) > 1: - node['is_leaf'] = False - set_atommap(node['mol'], node['nid']) + node["is_leaf"] = False + set_atommap(node["mol"], node["nid"]) - mol_tree_sg = mol_tree.graph.subgraph(effective_nodes.to(tree_vec.device)) + mol_tree_sg = mol_tree.graph.subgraph( + effective_nodes.to(tree_vec.device) + ) mol_tree_msg, _ = self.jtnn([mol_tree_sg]) mol_tree_msg = unbatch(mol_tree_msg)[0] mol_tree_msg.nodes_dict = nodes_dict - cur_mol = copy_edit_mol(nodes_dict[0]['mol']) + cur_mol = copy_edit_mol(nodes_dict[0]["mol"]) global_amap = [{}] + [{} for node in nodes_dict] - global_amap[1] = {atom.GetIdx(): atom.GetIdx() for atom in cur_mol.GetAtoms()} + global_amap[1] = { + atom.GetIdx(): atom.GetIdx() for atom in cur_mol.GetAtoms() + } - cur_mol = self.dfs_assemble(mol_tree_msg, mol_vec, cur_mol, global_amap, [], 0, None) + cur_mol = self.dfs_assemble( + mol_tree_msg, mol_vec, cur_mol, global_amap, [], 0, None + ) if cur_mol is None: return None @@ -207,58 +253,88 @@ def decode(self, tree_vec, mol_vec): if len(stereo_cands) == 1: return stereo_cands[0] stereo_graphs = [mol2dgl_enc(c) for c in stereo_cands] - stereo_cand_graphs, atom_x, bond_x = \ - zip(*stereo_graphs) + stereo_cand_graphs, atom_x, bond_x = zip(*stereo_graphs) stereo_cand_graphs = cuda(batch(stereo_cand_graphs)) atom_x = cuda(torch.cat(atom_x)) bond_x = cuda(torch.cat(bond_x)) - stereo_cand_graphs.ndata['x'] = atom_x - stereo_cand_graphs.edata['x'] = bond_x - stereo_cand_graphs.edata['src_x'] = atom_x.new( - bond_x.shape[0], atom_x.shape[1]).zero_() + stereo_cand_graphs.ndata["x"] = atom_x + stereo_cand_graphs.edata["x"] = bond_x + stereo_cand_graphs.edata["src_x"] = atom_x.new( + bond_x.shape[0], atom_x.shape[1] + ).zero_() stereo_vecs = self.mpn(stereo_cand_graphs) stereo_vecs = self.G_mean(stereo_vecs) scores = F.cosine_similarity(stereo_vecs, mol_vec) _, max_id = scores.max(0) return stereo_cands[max_id.item()] - def dfs_assemble(self, mol_tree_msg, mol_vec, cur_mol, - global_amap, fa_amap, cur_node_id, fa_node_id): + def dfs_assemble( + self, + mol_tree_msg, + mol_vec, + cur_mol, + global_amap, + fa_amap, + cur_node_id, + fa_node_id, + ): nodes_dict = mol_tree_msg.nodes_dict fa_node = nodes_dict[fa_node_id] if fa_node_id is not None else None cur_node = nodes_dict[cur_node_id] - fa_nid = fa_node['nid'] if fa_node is not None else -1 + fa_nid = fa_node["nid"] if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] - children_node_id = [v for v in mol_tree_msg.successors(cur_node_id).tolist() - if nodes_dict[v]['nid'] != fa_nid] + children_node_id = [ + v + for v in mol_tree_msg.successors(cur_node_id).tolist() + if nodes_dict[v]["nid"] != fa_nid + ] children = [nodes_dict[v] for v in children_node_id] - neighbors = [nei for nei in children if nei['mol'].GetNumAtoms() > 1] - neighbors = sorted(neighbors, key=lambda x: x['mol'].GetNumAtoms(), reverse=True) - singletons = [nei for nei in children if nei['mol'].GetNumAtoms() == 1] + neighbors = [nei for nei in children if nei["mol"].GetNumAtoms() > 1] + neighbors = sorted( + neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True + ) + singletons = [nei for nei in children if nei["mol"].GetNumAtoms() == 1] neighbors = singletons + neighbors - cur_amap = [(fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node['nid']] + cur_amap = [ + (fa_nid, a2, a1) + for nid, a1, a2 in fa_amap + if nid == cur_node["nid"] + ] cands = enum_assemble_nx(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0: return None cand_smiles, cand_mols, cand_amap = list(zip(*cands)) cands = [(candmol, mol_tree_msg, cur_node_id) for candmol in cand_mols] - cand_graphs, atom_x, bond_x, tree_mess_src_edges, \ - tree_mess_tgt_edges, tree_mess_tgt_nodes = mol2dgl_dec(cands) + ( + cand_graphs, + atom_x, + bond_x, + tree_mess_src_edges, + tree_mess_tgt_edges, + tree_mess_tgt_nodes, + ) = mol2dgl_dec(cands) cand_graphs = batch([g.to(mol_vec.device) for g in cand_graphs]) atom_x = cuda(atom_x) bond_x = cuda(bond_x) - cand_graphs.ndata['x'] = atom_x - cand_graphs.edata['x'] = bond_x - cand_graphs.edata['src_x'] = atom_x.new(bond_x.shape[0], atom_x.shape[1]).zero_() + cand_graphs.ndata["x"] = atom_x + cand_graphs.edata["x"] = bond_x + cand_graphs.edata["src_x"] = atom_x.new( + bond_x.shape[0], atom_x.shape[1] + ).zero_() cand_vecs = self.jtmpn( - (cand_graphs, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes), - mol_tree_msg, - ) + ( + cand_graphs, + tree_mess_src_edges, + tree_mess_tgt_edges, + tree_mess_tgt_nodes, + ), + mol_tree_msg, + ) cand_vecs = self.G_mean(cand_vecs) mol_vec = mol_vec.squeeze() scores = cand_vecs @ mol_vec @@ -274,7 +350,9 @@ def dfs_assemble(self, mol_tree_msg, mol_vec, cur_mol, for nei_id, ctr_atom, nei_atom in pred_amap: if nei_id == fa_nid: continue - new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node['nid']][ctr_atom] + new_global_amap[nei_id][nei_atom] = new_global_amap[ + cur_node["nid"] + ][ctr_atom] cur_mol = attach_mols_nx(cur_mol, children, [], new_global_amap) new_mol = cur_mol.GetMol() @@ -285,11 +363,17 @@ def dfs_assemble(self, mol_tree_msg, mol_vec, cur_mol, result = True for nei_node_id, nei_node in zip(children_node_id, children): - if nei_node['is_leaf']: + if nei_node["is_leaf"]: continue cur_mol = self.dfs_assemble( - mol_tree_msg, mol_vec, cur_mol, new_global_amap, pred_amap, - nei_node_id, cur_node_id) + mol_tree_msg, + mol_vec, + cur_mol, + new_global_amap, + pred_amap, + nei_node_id, + cur_node_id, + ) if cur_mol is None: result = False break diff --git a/examples/pytorch/jtnn/jtnn/line_profiler_integration.py b/examples/pytorch/jtnn/jtnn/line_profiler_integration.py index e7ddf0a82ae8..a10a74bc7165 100644 --- a/examples/pytorch/jtnn/jtnn/line_profiler_integration.py +++ b/examples/pytorch/jtnn/jtnn/line_profiler_integration.py @@ -1,19 +1,23 @@ -''' +""" line_profiler integration -''' +""" import os -if os.getenv('PROFILE', 0): - import line_profiler +if os.getenv("PROFILE", 0): import atexit + + import line_profiler + profile = line_profiler.LineProfiler() - profile_output = os.getenv('PROFILE_OUTPUT', None) + profile_output = os.getenv("PROFILE_OUTPUT", None) if profile_output: from functools import partial + atexit.register(partial(profile.dump_stats, profile_output)) else: atexit.register(profile.print_stats) else: + def profile(f): return f diff --git a/examples/pytorch/jtnn/jtnn/mol_tree.py b/examples/pytorch/jtnn/jtnn/mol_tree.py index 641b44a0875e..551431ee82b0 100644 --- a/examples/pytorch/jtnn/jtnn/mol_tree.py +++ b/examples/pytorch/jtnn/jtnn/mol_tree.py @@ -1,17 +1,22 @@ -import rdkit.Chem as Chem import copy +import rdkit.Chem as Chem + + def get_slots(smiles): mol = Chem.MolFromSmiles(smiles) - return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()] + return [ + (atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) + for atom in mol.GetAtoms() + ] -class Vocab(object): +class Vocab(object): def __init__(self, smiles_list): self.vocab = smiles_list - self.vmap = {x:i for i,x in enumerate(self.vocab)} + self.vmap = {x: i for i, x in enumerate(self.vocab)} self.slots = [get_slots(smiles) for smiles in self.vocab] - + def get_index(self, smiles): return self.vmap[smiles] diff --git a/examples/pytorch/jtnn/jtnn/mol_tree_nx.py b/examples/pytorch/jtnn/jtnn/mol_tree_nx.py index 7cca5de43973..455d7686772e 100644 --- a/examples/pytorch/jtnn/jtnn/mol_tree_nx.py +++ b/examples/pytorch/jtnn/jtnn/mol_tree_nx.py @@ -1,8 +1,18 @@ -import dgl -import rdkit.Chem as Chem -from .chemutils import get_clique_mol, tree_decomp, get_mol, get_smiles, \ - set_atommap, enum_assemble_nx, decode_stereo import numpy as np +import rdkit.Chem as Chem + +import dgl + +from .chemutils import ( + decode_stereo, + enum_assemble_nx, + get_clique_mol, + get_mol, + get_smiles, + set_atommap, + tree_decomp, +) + class DGLMolTree(object): def __init__(self, smiles): @@ -28,21 +38,23 @@ def __init__(self, smiles): cmol = get_clique_mol(self.mol, c) csmiles = get_smiles(cmol) self.nodes_dict[i] = dict( - smiles=csmiles, - mol=get_mol(csmiles), - clique=c, - ) + smiles=csmiles, + mol=get_mol(csmiles), + clique=c, + ) if min(c) == 0: root = i # The clique with atom ID 0 becomes root if root > 0: for attr in self.nodes_dict[0]: - self.nodes_dict[0][attr], self.nodes_dict[root][attr] = \ - self.nodes_dict[root][attr], self.nodes_dict[0][attr] + self.nodes_dict[0][attr], self.nodes_dict[root][attr] = ( + self.nodes_dict[root][attr], + self.nodes_dict[0][attr], + ) - src = np.zeros((len(edges) * 2,), dtype='int') - dst = np.zeros((len(edges) * 2,), dtype='int') + src = np.zeros((len(edges) * 2,), dtype="int") + dst = np.zeros((len(edges) * 2,), dtype="int") for i, (_x, _y) in enumerate(edges): x = 0 if _x == root else root if _x == 0 else _x y = 0 if _y == root else root if _y == 0 else _y @@ -53,10 +65,12 @@ def __init__(self, smiles): self.graph = dgl.graph((src, dst), num_nodes=len(cliques)) for i in self.nodes_dict: - self.nodes_dict[i]['nid'] = i + 1 - if self.graph.out_degrees(i) > 1: # Leaf node mol is not marked - set_atommap(self.nodes_dict[i]['mol'], self.nodes_dict[i]['nid']) - self.nodes_dict[i]['is_leaf'] = (self.graph.out_degrees(i) == 1) + self.nodes_dict[i]["nid"] = i + 1 + if self.graph.out_degrees(i) > 1: # Leaf node mol is not marked + set_atommap( + self.nodes_dict[i]["mol"], self.nodes_dict[i]["nid"] + ) + self.nodes_dict[i]["is_leaf"] = self.graph.out_degrees(i) == 1 def treesize(self): return self.graph.number_of_nodes() @@ -65,49 +79,65 @@ def _recover_node(self, i, original_mol): node = self.nodes_dict[i] clique = [] - clique.extend(node['clique']) - if not node['is_leaf']: - for cidx in node['clique']: - original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node['nid']) + clique.extend(node["clique"]) + if not node["is_leaf"]: + for cidx in node["clique"]: + original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node["nid"]) for j in self.graph.successors(i).numpy(): nei_node = self.nodes_dict[j] - clique.extend(nei_node['clique']) - if nei_node['is_leaf']: # Leaf node, no need to mark + clique.extend(nei_node["clique"]) + if nei_node["is_leaf"]: # Leaf node, no need to mark continue - for cidx in nei_node['clique']: + for cidx in nei_node["clique"]: # allow singleton node override the atom mapping - if cidx not in node['clique'] or len(nei_node['clique']) == 1: + if cidx not in node["clique"] or len(nei_node["clique"]) == 1: atom = original_mol.GetAtomWithIdx(cidx) - atom.SetAtomMapNum(nei_node['nid']) + atom.SetAtomMapNum(nei_node["nid"]) clique = list(set(clique)) label_mol = get_clique_mol(original_mol, clique) - node['label'] = Chem.MolToSmiles(Chem.MolFromSmiles(get_smiles(label_mol))) - node['label_mol'] = get_mol(node['label']) + node["label"] = Chem.MolToSmiles( + Chem.MolFromSmiles(get_smiles(label_mol)) + ) + node["label_mol"] = get_mol(node["label"]) for cidx in clique: original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0) - return node['label'] + return node["label"] def _assemble_node(self, i): - neighbors = [self.nodes_dict[j] for j in self.graph.successors(i).numpy() - if self.nodes_dict[j]['mol'].GetNumAtoms() > 1] - neighbors = sorted(neighbors, key=lambda x: x['mol'].GetNumAtoms(), reverse=True) - singletons = [self.nodes_dict[j] for j in self.graph.successors(i).numpy() - if self.nodes_dict[j]['mol'].GetNumAtoms() == 1] + neighbors = [ + self.nodes_dict[j] + for j in self.graph.successors(i).numpy() + if self.nodes_dict[j]["mol"].GetNumAtoms() > 1 + ] + neighbors = sorted( + neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True + ) + singletons = [ + self.nodes_dict[j] + for j in self.graph.successors(i).numpy() + if self.nodes_dict[j]["mol"].GetNumAtoms() == 1 + ] neighbors = singletons + neighbors cands = enum_assemble_nx(self.nodes_dict[i], neighbors) if len(cands) > 0: - self.nodes_dict[i]['cands'], self.nodes_dict[i]['cand_mols'], _ = list(zip(*cands)) - self.nodes_dict[i]['cands'] = list(self.nodes_dict[i]['cands']) - self.nodes_dict[i]['cand_mols'] = list(self.nodes_dict[i]['cand_mols']) + ( + self.nodes_dict[i]["cands"], + self.nodes_dict[i]["cand_mols"], + _, + ) = list(zip(*cands)) + self.nodes_dict[i]["cands"] = list(self.nodes_dict[i]["cands"]) + self.nodes_dict[i]["cand_mols"] = list( + self.nodes_dict[i]["cand_mols"] + ) else: - self.nodes_dict[i]['cands'] = [] - self.nodes_dict[i]['cand_mols'] = [] + self.nodes_dict[i]["cands"] = [] + self.nodes_dict[i]["cand_mols"] = [] def recover(self): for i in self.nodes_dict: diff --git a/examples/pytorch/jtnn/jtnn/mpn.py b/examples/pytorch/jtnn/jtnn/mpn.py index d1055e8c7c32..6c0dc1000158 100644 --- a/examples/pytorch/jtnn/jtnn/mpn.py +++ b/examples/pytorch/jtnn/jtnn/mpn.py @@ -1,37 +1,74 @@ +import rdkit.Chem as Chem import torch import torch.nn as nn -import rdkit.Chem as Chem import torch.nn.functional as F -from .chemutils import get_mol + import dgl -from dgl import mean_nodes, line_graph import dgl.function as DGLF +from dgl import line_graph, mean_nodes + +from .chemutils import get_mol -ELEM_LIST = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', - 'Fe', 'Al', 'I', 'B', 'K', 'Se', 'Zn', 'H', 'Cu', 'Mn', 'unknown'] +ELEM_LIST = [ + "C", + "N", + "O", + "S", + "F", + "Si", + "P", + "Cl", + "Br", + "Mg", + "Na", + "Ca", + "Fe", + "Al", + "I", + "B", + "K", + "Se", + "Zn", + "H", + "Cu", + "Mn", + "unknown", +] ATOM_FDIM = len(ELEM_LIST) + 6 + 5 + 4 + 1 BOND_FDIM = 5 + 6 MAX_NB = 6 + def onek_encoding_unk(x, allowable_set): if x not in allowable_set: x = allowable_set[-1] return [x == s for s in allowable_set] + def atom_features(atom): - return (torch.Tensor(onek_encoding_unk(atom.GetSymbol(), ELEM_LIST) - + onek_encoding_unk(atom.GetDegree(), [0,1,2,3,4,5]) - + onek_encoding_unk(atom.GetFormalCharge(), [-1,-2,1,2,0]) - + onek_encoding_unk(int(atom.GetChiralTag()), [0,1,2,3]) - + [atom.GetIsAromatic()])) + return torch.Tensor( + onek_encoding_unk(atom.GetSymbol(), ELEM_LIST) + + onek_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) + + onek_encoding_unk(atom.GetFormalCharge(), [-1, -2, 1, 2, 0]) + + onek_encoding_unk(int(atom.GetChiralTag()), [0, 1, 2, 3]) + + [atom.GetIsAromatic()] + ) + def bond_features(bond): bt = bond.GetBondType() stereo = int(bond.GetStereo()) - fbond = [bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, bond.IsInRing()] - fstereo = onek_encoding_unk(stereo, [0,1,2,3,4,5]) - return (torch.Tensor(fbond + fstereo)) + fbond = [ + bt == Chem.rdchem.BondType.SINGLE, + bt == Chem.rdchem.BondType.DOUBLE, + bt == Chem.rdchem.BondType.TRIPLE, + bt == Chem.rdchem.BondType.AROMATIC, + bond.IsInRing(), + ] + fstereo = onek_encoding_unk(stereo, [0, 1, 2, 3, 4, 5]) + return torch.Tensor(fbond + fstereo) + def mol2dgl_single(smiles): n_edges = 0 @@ -61,8 +98,11 @@ def mol2dgl_single(smiles): bond_x.append(features) graph = dgl.graph((bond_src, bond_dst), num_nodes=n_atoms) n_edges += n_bonds - return graph, torch.stack(atom_x), \ - torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0) + return ( + graph, + torch.stack(atom_x), + torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0), + ) class LoopyBPUpdate(nn.Module): @@ -73,10 +113,10 @@ def __init__(self, hidden_size): self.W_h = nn.Linear(hidden_size, hidden_size, bias=False) def forward(self, nodes): - msg_input = nodes.data['msg_input'] - msg_delta = self.W_h(nodes.data['accum_msg']) + msg_input = nodes.data["msg_input"] + msg_delta = self.W_h(nodes.data["accum_msg"]) msg = F.relu(msg_input + msg_delta) - return {'msg': msg} + return {"msg": msg} class GatherUpdate(nn.Module): @@ -87,9 +127,9 @@ def __init__(self, hidden_size): self.W_o = nn.Linear(ATOM_FDIM + hidden_size, hidden_size) def forward(self, nodes): - m = nodes.data['m'] + m = nodes.data["m"] return { - 'h': F.relu(self.W_o(torch.cat([nodes.data['x'], m], 1))), + "h": F.relu(self.W_o(torch.cat([nodes.data["x"], m], 1))), } @@ -121,7 +161,7 @@ def forward(self, mol_graph): mol_graph = self.run(mol_graph, mol_line_graph) # TODO: replace with unbatch or readout - g_repr = mean_nodes(mol_graph, 'h') + g_repr = mean_nodes(mol_graph, "h") self.n_samples_total += n_samples self.n_nodes_total += n_nodes @@ -134,32 +174,38 @@ def run(self, mol_graph, mol_line_graph): n_nodes = mol_graph.number_of_nodes() mol_graph.apply_edges( - func=lambda edges: {'src_x': edges.src['x']}, + func=lambda edges: {"src_x": edges.src["x"]}, ) mol_line_graph.ndata.update(mol_graph.edata) e_repr = mol_line_graph.ndata - bond_features = e_repr['x'] - source_features = e_repr['src_x'] + bond_features = e_repr["x"] + source_features = e_repr["src_x"] features = torch.cat([source_features, bond_features], 1) msg_input = self.W_i(features) - mol_line_graph.ndata.update({ - 'msg_input': msg_input, - 'msg': F.relu(msg_input), - 'accum_msg': torch.zeros_like(msg_input), - }) - mol_graph.ndata.update({ - 'm': bond_features.new(n_nodes, self.hidden_size).zero_(), - 'h': bond_features.new(n_nodes, self.hidden_size).zero_(), - }) + mol_line_graph.ndata.update( + { + "msg_input": msg_input, + "msg": F.relu(msg_input), + "accum_msg": torch.zeros_like(msg_input), + } + ) + mol_graph.ndata.update( + { + "m": bond_features.new(n_nodes, self.hidden_size).zero_(), + "h": bond_features.new(n_nodes, self.hidden_size).zero_(), + } + ) for i in range(self.depth - 1): - mol_line_graph.update_all(DGLF.copy_u('msg', 'msg'), DGLF.sum('msg', 'accum_msg')) + mol_line_graph.update_all( + DGLF.copy_u("msg", "msg"), DGLF.sum("msg", "accum_msg") + ) mol_line_graph.apply_nodes(self.loopy_bp_updater) mol_graph.edata.update(mol_line_graph.ndata) - mol_graph.update_all(DGLF.copy_e('msg', 'msg'), DGLF.sum('msg', 'm')) + mol_graph.update_all(DGLF.copy_e("msg", "msg"), DGLF.sum("msg", "m")) mol_graph.apply_nodes(self.gather_updater) return mol_graph diff --git a/examples/pytorch/jtnn/jtnn/nnutils.py b/examples/pytorch/jtnn/jtnn/nnutils.py index 8ef01ee25c4e..ffd76b4965ac 100644 --- a/examples/pytorch/jtnn/jtnn/nnutils.py +++ b/examples/pytorch/jtnn/jtnn/nnutils.py @@ -1,12 +1,14 @@ +import os + import torch import torch.nn as nn -import os + import dgl def cuda(x): - if torch.cuda.is_available() and not os.getenv('NOCUDA', None): - return x.to(torch.device('cuda')) # works for both DGLGraph and tensor + if torch.cuda.is_available() and not os.getenv("NOCUDA", None): + return x.to(torch.device("cuda")) # works for both DGLGraph and tensor else: return x @@ -22,27 +24,28 @@ def __init__(self, hidden_size): self.W_h = nn.Linear(2 * hidden_size, hidden_size) def update_zm(self, node): - src_x = node.data['src_x'] - s = node.data['s'] - rm = node.data['accum_rm'] + src_x = node.data["src_x"] + s = node.data["s"] + rm = node.data["accum_rm"] z = torch.sigmoid(self.W_z(torch.cat([src_x, s], 1))) m = torch.tanh(self.W_h(torch.cat([src_x, rm], 1))) m = (1 - z) * s + z * m - return {'m': m, 'z': z} + return {"m": m, "z": z} def update_r(self, node, zm=None): - dst_x = node.data['dst_x'] - m = node.data['m'] if zm is None else zm['m'] + dst_x = node.data["dst_x"] + m = node.data["m"] if zm is None else zm["m"] r_1 = self.W_r(dst_x) r_2 = self.U_r(m) r = torch.sigmoid(r_1 + r_2) - return {'r': r, 'rm': r * m} + return {"r": r, "rm": r * m} def forward(self, node): dic = self.update_zm(node) dic.update(self.update_r(node, zm=dic)) return dic + def tocpu(g): src, dst = g.edges() src = src.cpu() diff --git a/examples/pytorch/jtnn/vaetrain_dgl.py b/examples/pytorch/jtnn/vaetrain_dgl.py index 66927ab1ec6b..2deee95effbb 100755 --- a/examples/pytorch/jtnn/vaetrain_dgl.py +++ b/examples/pytorch/jtnn/vaetrain_dgl.py @@ -1,27 +1,35 @@ +import math +import random +import sys +from collections import deque +from optparse import OptionParser + +import rdkit import torch import torch.nn as nn import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler -from torch.utils.data import DataLoader - -import math, random, sys -from optparse import OptionParser -from collections import deque -import rdkit import tqdm - from jtnn import * +from torch.utils.data import DataLoader + +torch.multiprocessing.set_sharing_strategy("file_system") -torch.multiprocessing.set_sharing_strategy('file_system') def worker_init_fn(id_): - lg = rdkit.RDLogger.logger() + lg = rdkit.RDLogger.logger() lg.setLevel(rdkit.RDLogger.CRITICAL) + + worker_init_fn(None) parser = OptionParser() -parser.add_option("-t", "--train", dest="train", default='train', help='Training file name') -parser.add_option("-v", "--vocab", dest="vocab", default='vocab', help='Vocab file name') +parser.add_option( + "-t", "--train", dest="train", default="train", help="Training file name" +) +parser.add_option( + "-v", "--vocab", dest="vocab", default="vocab", help="Vocab file name" +) parser.add_option("-s", "--save_dir", dest="save_path") parser.add_option("-m", "--model", dest="model_path", default=None) parser.add_option("-b", "--batch", dest="batch_size", default=40) @@ -31,7 +39,7 @@ def worker_init_fn(id_): parser.add_option("-z", "--beta", dest="beta", default=1.0) parser.add_option("-q", "--lr", dest="lr", default=1e-3) parser.add_option("-T", "--test", dest="test", action="store_true") -opts,args = parser.parse_args() +opts, args = parser.parse_args() dataset = JTNNDataset(data=opts.train, vocab=opts.vocab, training=True) vocab = dataset.vocab @@ -55,7 +63,10 @@ def worker_init_fn(id_): nn.init.xavier_normal(param) model = cuda(model) -print("Model #Params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000,)) +print( + "Model #Params: %dK" + % (sum([x.nelement() for x in model.parameters()]) / 1000,) +) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.ExponentialLR(optimizer, 0.9) @@ -64,26 +75,28 @@ def worker_init_fn(id_): MAX_EPOCH = 100 PRINT_ITER = 20 + def train(): dataset.training = True dataloader = DataLoader( - dataset, - batch_size=batch_size, - shuffle=True, - num_workers=4, - collate_fn=JTNNCollator(vocab, True), - drop_last=True, - worker_init_fn=worker_init_fn) + dataset, + batch_size=batch_size, + shuffle=True, + num_workers=4, + collate_fn=JTNNCollator(vocab, True), + drop_last=True, + worker_init_fn=worker_init_fn, + ) for epoch in range(MAX_EPOCH): - word_acc,topo_acc,assm_acc,steo_acc = 0,0,0,0 + word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0 for it, batch in enumerate(tqdm.tqdm(dataloader)): model.zero_grad() try: loss, kl_div, wacc, tacc, sacc, dacc = model(batch, beta) except: - print([t.smiles for t in batch['mol_trees']]) + print([t.smiles for t in batch["mol_trees"]]) raise loss.backward() optimizer.step() @@ -99,36 +112,51 @@ def train(): assm_acc = assm_acc / PRINT_ITER * 100 steo_acc = steo_acc / PRINT_ITER * 100 - print("KL: %.1f, Word: %.2f, Topo: %.2f, Assm: %.2f, Steo: %.2f, Loss: %.6f" % ( - kl_div, word_acc, topo_acc, assm_acc, steo_acc, loss.item())) - word_acc,topo_acc,assm_acc,steo_acc = 0,0,0,0 + print( + "KL: %.1f, Word: %.2f, Topo: %.2f, Assm: %.2f, Steo: %.2f, Loss: %.6f" + % ( + kl_div, + word_acc, + topo_acc, + assm_acc, + steo_acc, + loss.item(), + ) + ) + word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0 sys.stdout.flush() - if (it + 1) % 1500 == 0: #Fast annealing + if (it + 1) % 1500 == 0: # Fast annealing scheduler.step() print("learning rate: %.6f" % scheduler.get_lr()[0]) - torch.save(model.state_dict(), - opts.save_path + "/model.iter-%d-%d" % (epoch, it + 1)) + torch.save( + model.state_dict(), + opts.save_path + "/model.iter-%d-%d" % (epoch, it + 1), + ) scheduler.step() print("learning rate: %.6f" % scheduler.get_lr()[0]) - torch.save(model.state_dict(), opts.save_path + "/model.iter-" + str(epoch)) + torch.save( + model.state_dict(), opts.save_path + "/model.iter-" + str(epoch) + ) + def test(): dataset.training = False dataloader = DataLoader( - dataset, - batch_size=1, - shuffle=False, - num_workers=0, - collate_fn=JTNNCollator(vocab, False), - drop_last=True, - worker_init_fn=worker_init_fn) + dataset, + batch_size=1, + shuffle=False, + num_workers=0, + collate_fn=JTNNCollator(vocab, False), + drop_last=True, + worker_init_fn=worker_init_fn, + ) # Just an example of molecule decoding; in reality you may want to sample # tree and molecule vectors. for it, batch in enumerate(dataloader): - gt_smiles = batch['mol_trees'][0].smiles + gt_smiles = batch["mol_trees"][0].smiles print(gt_smiles) model.move_to_cuda(batch) _, tree_vec, mol_vec = model.encode(batch) @@ -136,21 +164,28 @@ def test(): smiles = model.decode(tree_vec, mol_vec) print(smiles) -if __name__ == '__main__': + +if __name__ == "__main__": if opts.test: test() else: train() - print('# passes:', model.n_passes) - print('Total # nodes processed:', model.n_nodes_total) - print('Total # edges processed:', model.n_edges_total) - print('Total # tree nodes processed:', model.n_tree_nodes_total) - print('Graph decoder: # passes:', model.jtmpn.n_passes) - print('Graph decoder: Total # candidates processed:', model.jtmpn.n_samples_total) - print('Graph decoder: Total # nodes processed:', model.jtmpn.n_nodes_total) - print('Graph decoder: Total # edges processed:', model.jtmpn.n_edges_total) - print('Graph encoder: # passes:', model.mpn.n_passes) - print('Graph encoder: Total # candidates processed:', model.mpn.n_samples_total) - print('Graph encoder: Total # nodes processed:', model.mpn.n_nodes_total) - print('Graph encoder: Total # edges processed:', model.mpn.n_edges_total) + print("# passes:", model.n_passes) + print("Total # nodes processed:", model.n_nodes_total) + print("Total # edges processed:", model.n_edges_total) + print("Total # tree nodes processed:", model.n_tree_nodes_total) + print("Graph decoder: # passes:", model.jtmpn.n_passes) + print( + "Graph decoder: Total # candidates processed:", + model.jtmpn.n_samples_total, + ) + print("Graph decoder: Total # nodes processed:", model.jtmpn.n_nodes_total) + print("Graph decoder: Total # edges processed:", model.jtmpn.n_edges_total) + print("Graph encoder: # passes:", model.mpn.n_passes) + print( + "Graph encoder: Total # candidates processed:", + model.mpn.n_samples_total, + ) + print("Graph encoder: Total # nodes processed:", model.mpn.n_nodes_total) + print("Graph encoder: Total # edges processed:", model.mpn.n_edges_total) diff --git a/examples/pytorch/label_propagation/main.py b/examples/pytorch/label_propagation/main.py index ad478b36f0c1..94bdd4b28325 100644 --- a/examples/pytorch/label_propagation/main.py +++ b/examples/pytorch/label_propagation/main.py @@ -1,55 +1,63 @@ import argparse + import torch + import dgl -from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.nn import LabelPropagation def main(): # check cuda - device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu' + device = ( + f"cuda:{args.gpu}" + if torch.cuda.is_available() and args.gpu >= 0 + else "cpu" + ) # load data - if args.dataset == 'Cora': + if args.dataset == "Cora": dataset = CoraGraphDataset() - elif args.dataset == 'Citeseer': + elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() - elif args.dataset == 'Pubmed': + elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: - raise ValueError('Dataset {} is invalid.'.format(args.dataset)) - + raise ValueError("Dataset {} is invalid.".format(args.dataset)) + g = dataset[0] g = dgl.add_self_loop(g) - labels = g.ndata.pop('label').to(device).long() + labels = g.ndata.pop("label").to(device).long() # load masks for train / test, valid is not used. - train_mask = g.ndata.pop('train_mask') - test_mask = g.ndata.pop('test_mask') + train_mask = g.ndata.pop("train_mask") + test_mask = g.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) g = g.to(device) - + # label propagation lp = LabelPropagation(args.num_layers, args.alpha) logits = lp(g, labels, mask=train_idx) - test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx) + test_acc = torch.sum( + logits[test_idx].argmax(dim=1) == labels[test_idx] + ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) -if __name__ == '__main__': +if __name__ == "__main__": """ Label Propagation Hyperparameters """ - parser = argparse.ArgumentParser(description='LP') - parser.add_argument('--gpu', type=int, default=0) - parser.add_argument('--dataset', type=str, default='Cora') - parser.add_argument('--num-layers', type=int, default=10) - parser.add_argument('--alpha', type=float, default=0.5) + parser = argparse.ArgumentParser(description="LP") + parser.add_argument("--gpu", type=int, default=0) + parser.add_argument("--dataset", type=str, default="Cora") + parser.add_argument("--num-layers", type=int, default=10) + parser.add_argument("--alpha", type=float, default=0.5) args = parser.parse_args() print(args) diff --git a/examples/pytorch/lda/example_20newsgroups.py b/examples/pytorch/lda/example_20newsgroups.py index 8d6983283ed9..b1aec79e0fce 100644 --- a/examples/pytorch/lda/example_20newsgroups.py +++ b/examples/pytorch/lda/example_20newsgroups.py @@ -17,49 +17,49 @@ # See the License for the specific language governing permissions and # limitations under the License. -from time import time -import matplotlib.pyplot as plt import warnings +from time import time +import matplotlib.pyplot as plt import numpy as np import scipy.sparse as ss import torch -import dgl -from dgl import function as fn - -from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer -from sklearn.decomposition import NMF, LatentDirichletAllocation +from lda_model import LatentDirichletAllocation as LDAModel from sklearn.datasets import fetch_20newsgroups +from sklearn.decomposition import NMF, LatentDirichletAllocation +from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer -from lda_model import LatentDirichletAllocation as LDAModel +import dgl +from dgl import function as fn n_samples = 2000 n_features = 1000 n_components = 10 n_top_words = 20 -device = 'cuda' +device = "cuda" + def plot_top_words(model, feature_names, n_top_words, title): fig, axes = plt.subplots(2, 5, figsize=(30, 15), sharex=True) axes = axes.flatten() for topic_idx, topic in enumerate(model.components_): - top_features_ind = topic.argsort()[:-n_top_words - 1:-1] + top_features_ind = topic.argsort()[: -n_top_words - 1 : -1] top_features = [feature_names[i] for i in top_features_ind] weights = topic[top_features_ind] ax = axes[topic_idx] ax.barh(top_features, weights, height=0.7) - ax.set_title(f'Topic {topic_idx +1}', - fontdict={'fontsize': 30}) + ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30}) ax.invert_yaxis() - ax.tick_params(axis='both', which='major', labelsize=20) - for i in 'top right left'.split(): + ax.tick_params(axis="both", which="major", labelsize=20) + for i in "top right left".split(): ax.spines[i].set_visible(False) fig.suptitle(title, fontsize=40) plt.subplots_adjust(top=0.90, bottom=0.05, wspace=0.90, hspace=0.3) plt.show() + # Load the 20 newsgroups dataset and vectorize it. We use a few heuristics # to filter out useless terms early on: the posts are stripped of headers, # footers and quoted replies, and common English words, words occurring in @@ -67,43 +67,50 @@ def plot_top_words(model, feature_names, n_top_words, title): print("Loading dataset...") t0 = time() -data, _ = fetch_20newsgroups(shuffle=True, random_state=1, - remove=('headers', 'footers', 'quotes'), - return_X_y=True) +data, _ = fetch_20newsgroups( + shuffle=True, + random_state=1, + remove=("headers", "footers", "quotes"), + return_X_y=True, +) data_samples = data[:n_samples] -data_test = data[n_samples:2*n_samples] +data_test = data[n_samples : 2 * n_samples] print("done in %0.3fs." % (time() - t0)) # Use tf (raw term count) features for LDA. print("Extracting tf features for LDA...") -tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, - max_features=n_features, - stop_words='english') +tf_vectorizer = CountVectorizer( + max_df=0.95, min_df=2, max_features=n_features, stop_words="english" +) t0 = time() tf_vectorizer.fit(data) tf = tf_vectorizer.transform(data_samples) tt = tf_vectorizer.transform(data_test) tf_feature_names = tf_vectorizer.get_feature_names() -tf_uv = [(u,v) - for u,v,e in zip(tf.tocoo().row, tf.tocoo().col, tf.tocoo().data) - for _ in range(e)] -tt_uv = [(u,v) - for u,v,e in zip(tt.tocoo().row, tt.tocoo().col, tt.tocoo().data) - for _ in range(e)] +tf_uv = [ + (u, v) + for u, v, e in zip(tf.tocoo().row, tf.tocoo().col, tf.tocoo().data) + for _ in range(e) +] +tt_uv = [ + (u, v) + for u, v, e in zip(tt.tocoo().row, tt.tocoo().col, tt.tocoo().data) + for _ in range(e) +] print("done in %0.3fs." % (time() - t0)) print() print("Preparing dgl graphs...") t0 = time() -G = dgl.heterograph({('doc','topic','word'): tf_uv}, device=device) -Gt = dgl.heterograph({('doc','topic','word'): tt_uv}, device=device) +G = dgl.heterograph({("doc", "topic", "word"): tf_uv}, device=device) +Gt = dgl.heterograph({("doc", "topic", "word"): tt_uv}, device=device) print("done in %0.3fs." % (time() - t0)) print() print("Training dgl-lda model...") t0 = time() -model = LDAModel(G.num_nodes('word'), n_components) +model = LDAModel(G.num_nodes("word"), n_components) model.fit(G) print("done in %0.3fs." % (time() - t0)) print() @@ -113,20 +120,27 @@ def plot_top_words(model, feature_names, n_top_words, title): word_nphi = np.vstack([nphi.tolist() for nphi in model.word_data.nphi]) plot_top_words( - type('dummy', (object,), {'components_': word_nphi}), - tf_feature_names, n_top_words, 'Topics in LDA model') + type("dummy", (object,), {"components_": word_nphi}), + tf_feature_names, + n_top_words, + "Topics in LDA model", +) print("Training scikit-learn model...") -print('\n' * 2, "Fitting LDA models with tf features, " - "n_samples=%d and n_features=%d..." - % (n_samples, n_features)) -lda = LatentDirichletAllocation(n_components=n_components, max_iter=5, - learning_method='online', - learning_offset=50., - random_state=0, - verbose=1, - ) +print( + "\n" * 2, + "Fitting LDA models with tf features, " + "n_samples=%d and n_features=%d..." % (n_samples, n_features), +) +lda = LatentDirichletAllocation( + n_components=n_components, + max_iter=5, + learning_method="online", + learning_offset=50.0, + random_state=0, + verbose=1, +) t0 = time() lda.fit(tf) print("done in %0.3fs." % (time() - t0)) diff --git a/examples/pytorch/lda/lda_model.py b/examples/pytorch/lda/lda_model.py index 69f7fa233ca4..a92f27ac291d 100644 --- a/examples/pytorch/lda/lda_model.py +++ b/examples/pytorch/lda/lda_model.py @@ -17,8 +17,17 @@ # limitations under the License. -import os, functools, warnings, torch, collections, dgl, io -import numpy as np, scipy as sp +import collections +import functools +import io +import os +import warnings + +import numpy as np +import scipy as sp +import torch + +import dgl try: from functools import cached_property @@ -37,17 +46,21 @@ def __init__(self, src_data, dst_data): @property def loglike(self): - return (self.src_data['Elog'] + self.dst_data['Elog']).logsumexp(1) + return (self.src_data["Elog"] + self.dst_data["Elog"]).logsumexp(1) @property def phi(self): return ( - self.src_data['Elog'] + self.dst_data['Elog'] - self.loglike.unsqueeze(1) + self.src_data["Elog"] + + self.dst_data["Elog"] + - self.loglike.unsqueeze(1) ).exp() @property def expectation(self): - return (self.src_data['expectation'] * self.dst_data['expectation']).sum(1) + return ( + self.src_data["expectation"] * self.dst_data["expectation"] + ).sum(1) class _Dirichlet: @@ -55,10 +68,13 @@ def __init__(self, prior, nphi, _chunksize=int(1e6)): self.prior = prior self.nphi = nphi self.device = nphi.device - self._sum_by_parts = lambda map_fn: functools.reduce(torch.add, [ - map_fn(slice(i, min(i+_chunksize, nphi.shape[1]))).sum(1) - for i in list(range(0, nphi.shape[1], _chunksize)) - ]) + self._sum_by_parts = lambda map_fn: functools.reduce( + torch.add, + [ + map_fn(slice(i, min(i + _chunksize, nphi.shape[1]))).sum(1) + for i in list(range(0, nphi.shape[1], _chunksize)) + ], + ) def _posterior(self, _ID=slice(None)): return self.prior + self.nphi[:, _ID] @@ -68,14 +84,15 @@ def posterior_sum(self): return self.nphi.sum(1) + self.prior * self.nphi.shape[1] def _Elog(self, _ID=slice(None)): - return torch.digamma(self._posterior(_ID)) - \ - torch.digamma(self.posterior_sum.unsqueeze(1)) + return torch.digamma(self._posterior(_ID)) - torch.digamma( + self.posterior_sum.unsqueeze(1) + ) @cached_property def loglike(self): neg_evid = -self._sum_by_parts( lambda s: (self.nphi[:, s] * self._Elog(s)) - ) + ) prior = torch.as_tensor(self.prior).to(self.nphi) K = self.nphi.shape[1] @@ -83,7 +100,7 @@ def loglike(self): log_B_posterior = self._sum_by_parts( lambda s: torch.lgamma(self._posterior(s)) - ) - torch.lgamma(self.posterior_sum) + ) - torch.lgamma(self.posterior_sum) return neg_evid - log_B_prior + log_B_posterior @@ -105,9 +122,15 @@ def _expectation(self, _ID=slice(None)): @cached_property def Bayesian_gap(self): - return 1. - self._sum_by_parts(lambda s: self._Elog(s).exp()) + return 1.0 - self._sum_by_parts(lambda s: self._Elog(s).exp()) - _cached_properties = ["posterior_sum", "loglike", "n", "cdf", "Bayesian_gap"] + _cached_properties = [ + "posterior_sum", + "loglike", + "n", + "cdf", + "Bayesian_gap", + ] def clear_cache(self): for name in self._cached_properties: @@ -117,27 +140,29 @@ def clear_cache(self): pass def update(self, new, _ID=slice(None), rho=1): - """ inplace: old * (1-rho) + new * rho """ + """inplace: old * (1-rho) + new * rho""" self.clear_cache() mean_change = (self.nphi[:, _ID] - new).abs().mean().tolist() - self.nphi *= (1 - rho) + self.nphi *= 1 - rho self.nphi[:, _ID] += new * rho return mean_change class DocData(_Dirichlet): - """ nphi (n_docs by n_topics) """ + """nphi (n_docs by n_topics)""" + def prepare_graph(self, G, key="Elog"): - G.nodes['doc'].data[key] = getattr(self, '_'+key)().to(G.device) + G.nodes["doc"].data[key] = getattr(self, "_" + key)().to(G.device) def update_from(self, G, mult): - new = G.nodes['doc'].data['nphi'] * mult + new = G.nodes["doc"].data["nphi"] * mult return self.update(new.to(self.device)) class _Distributed(collections.UserList): - """ split on dim=0 and store on multiple devices """ + """split on dim=0 and store on multiple devices""" + def __init__(self, prior, nphi): self.prior = prior self.nphi = nphi @@ -146,36 +171,38 @@ def __init__(self, prior, nphi): def split_device(self, other, dim=0): split_sections = [x.shape[0] for x in self.nphi] out = torch.split(other, split_sections, dim) - return [y.to(x.device) for x,y in zip(self.nphi, out)] + return [y.to(x.device) for x, y in zip(self.nphi, out)] class WordData(_Distributed): - """ distributed nphi (n_topics by n_words), transpose to/from graph nodes data """ + """distributed nphi (n_topics by n_words), transpose to/from graph nodes data""" + def prepare_graph(self, G, key="Elog"): - if '_ID' in G.nodes['word'].data: - _ID = G.nodes['word'].data['_ID'] + if "_ID" in G.nodes["word"].data: + _ID = G.nodes["word"].data["_ID"] else: _ID = slice(None) - out = [getattr(part, '_'+key)(_ID).to(G.device) for part in self] - G.nodes['word'].data[key] = torch.cat(out).T - + out = [getattr(part, "_" + key)(_ID).to(G.device) for part in self] + G.nodes["word"].data[key] = torch.cat(out).T def update_from(self, G, mult, rho): - nphi = G.nodes['word'].data['nphi'].T * mult + nphi = G.nodes["word"].data["nphi"].T * mult - if '_ID' in G.nodes['word'].data: - _ID = G.nodes['word'].data['_ID'] + if "_ID" in G.nodes["word"].data: + _ID = G.nodes["word"].data["_ID"] else: _ID = slice(None) - mean_change = [x.update(y, _ID, rho) - for x, y in zip(self, self.split_device(nphi))] + mean_change = [ + x.update(y, _ID, rho) for x, y in zip(self, self.split_device(nphi)) + ] return np.mean(mean_change) -class Gamma(collections.namedtuple('Gamma', "concentration, rate")): - """ articulate the difference between torch gamma and numpy gamma """ +class Gamma(collections.namedtuple("Gamma", "concentration, rate")): + """articulate the difference between torch gamma and numpy gamma""" + @property def shape(self): return self.concentration @@ -218,20 +245,23 @@ class LatentDirichletAllocation: (NIPS 2010). [2] Reactive LDA Library blogpost by Yingjie Miao for a similar Gibbs model """ + def __init__( - self, n_words, n_components, + self, + n_words, + n_components, prior=None, rho=1, - mult={'doc': 1, 'word': 1}, - init={'doc': (100., 100.), 'word': (100., 100.)}, - device_list=['cpu'], + mult={"doc": 1, "word": 1}, + init={"doc": (100.0, 100.0), "word": (100.0, 100.0)}, + device_list=["cpu"], verbose=True, - ): + ): self.n_words = n_words self.n_components = n_components if prior is None: - prior = {'doc': 1./n_components, 'word': 1./n_components} + prior = {"doc": 1.0 / n_components, "word": 1.0 / n_components} self.prior = prior self.rho = rho @@ -239,117 +269,128 @@ def __init__( self.init = init assert not isinstance(device_list, str), "plz wrap devices in a list" - self.device_list = device_list[:n_components] # avoid edge cases + self.device_list = device_list[:n_components] # avoid edge cases self.verbose = verbose self._init_word_data() - def _init_word_data(self): split_sections = np.diff( - np.linspace(0, self.n_components, len(self.device_list)+1).astype(int) + np.linspace(0, self.n_components, len(self.device_list) + 1).astype( + int + ) ) word_nphi = [ - Gamma(*self.init['word']).sample((s, self.n_words), device) + Gamma(*self.init["word"]).sample((s, self.n_words), device) for s, device in zip(split_sections, self.device_list) ] - self.word_data = WordData(self.prior['word'], word_nphi) - + self.word_data = WordData(self.prior["word"], word_nphi) def _init_doc_data(self, n_docs, device): - doc_nphi = Gamma(*self.init['doc']).sample( - (n_docs, self.n_components), device) - return DocData(self.prior['doc'], doc_nphi) - + doc_nphi = Gamma(*self.init["doc"]).sample( + (n_docs, self.n_components), device + ) + return DocData(self.prior["doc"], doc_nphi) def save(self, f): for w in self.word_data: w.clear_cache() - torch.save({ - 'prior': self.prior, - 'rho': self.rho, - 'mult': self.mult, - 'init': self.init, - 'word_data': [part.nphi for part in self.word_data], - }, f) - + torch.save( + { + "prior": self.prior, + "rho": self.rho, + "mult": self.mult, + "init": self.init, + "word_data": [part.nphi for part in self.word_data], + }, + f, + ) def _prepare_graph(self, G, doc_data, key="Elog"): doc_data.prepare_graph(G, key) self.word_data.prepare_graph(G, key) - def _e_step(self, G, doc_data=None, mean_change_tol=1e-3, max_iters=100): - """_e_step implements doc data sampling until convergence or max_iters - """ + """_e_step implements doc data sampling until convergence or max_iters""" if doc_data is None: - doc_data = self._init_doc_data(G.num_nodes('doc'), G.device) + doc_data = self._init_doc_data(G.num_nodes("doc"), G.device) - G_rev = G.reverse() # word -> doc + G_rev = G.reverse() # word -> doc self.word_data.prepare_graph(G_rev) for i in range(max_iters): doc_data.prepare_graph(G_rev) G_rev.update_all( - lambda edges: {'phi': EdgeData(edges.src, edges.dst).phi}, - dgl.function.sum('phi', 'nphi') + lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi}, + dgl.function.sum("phi", "nphi"), ) - mean_change = doc_data.update_from(G_rev, self.mult['doc']) + mean_change = doc_data.update_from(G_rev, self.mult["doc"]) if mean_change < mean_change_tol: break if self.verbose: - print(f"e-step num_iters={i+1} with mean_change={mean_change:.4f}, " - f"perplexity={self.perplexity(G, doc_data):.4f}") + print( + f"e-step num_iters={i+1} with mean_change={mean_change:.4f}, " + f"perplexity={self.perplexity(G, doc_data):.4f}" + ) return doc_data - transform = _e_step - def predict(self, doc_data): pred_scores = [ # d_exp @ w._expectation() - (lambda x: x @ w.nphi + x.sum(1, keepdims=True) * w.prior) - (d_exp / w.posterior_sum.unsqueeze(0)) + (lambda x: x @ w.nphi + x.sum(1, keepdims=True) * w.prior)( + d_exp / w.posterior_sum.unsqueeze(0) + ) for (d_exp, w) in zip( self.word_data.split_device(doc_data._expectation(), dim=1), - self.word_data) + self.word_data, + ) ] x = torch.zeros_like(pred_scores[0], device=doc_data.device) for p in pred_scores: x += p.to(x.device) return x - def sample(self, doc_data, num_samples): - """ draw independent words and return the marginal probabilities, + """draw independent words and return the marginal probabilities, i.e., the expectations in Dirichlet distributions. """ + def fn(cdf): u = torch.rand(cdf.shape[0], num_samples, device=cdf.device) return torch.searchsorted(cdf, u).to(doc_data.device) topic_ids = fn(doc_data.cdf) word_ids = torch.cat([fn(part.cdf) for part in self.word_data]) - ids = torch.gather(word_ids, 0, topic_ids) # pick components by topic_ids + ids = torch.gather( + word_ids, 0, topic_ids + ) # pick components by topic_ids # compute expectation scores on sampled ids - src_ids = torch.arange( - ids.shape[0], dtype=ids.dtype, device=ids.device - ).reshape((-1, 1)).expand(ids.shape) - unique_ids, inverse_ids = torch.unique(ids, sorted=False, return_inverse=True) + src_ids = ( + torch.arange(ids.shape[0], dtype=ids.dtype, device=ids.device) + .reshape((-1, 1)) + .expand(ids.shape) + ) + unique_ids, inverse_ids = torch.unique( + ids, sorted=False, return_inverse=True + ) - G = dgl.heterograph({('doc','','word'): (src_ids.ravel(), inverse_ids.ravel())}) - G.nodes['word'].data['_ID'] = unique_ids + G = dgl.heterograph( + {("doc", "", "word"): (src_ids.ravel(), inverse_ids.ravel())} + ) + G.nodes["word"].data["_ID"] = unique_ids self._prepare_graph(G, doc_data, "expectation") - G.apply_edges(lambda e: {'expectation': EdgeData(e.src, e.dst).expectation}) - expectation = G.edata.pop('expectation').reshape(ids.shape) + G.apply_edges( + lambda e: {"expectation": EdgeData(e.src, e.dst).expectation} + ) + expectation = G.edata.pop("expectation").reshape(ids.shape) return ids, expectation - def _m_step(self, G, doc_data): """_m_step implements word data sampling and stores word_z stats. mean_change is in the sense of full graph with rho=1. @@ -357,26 +398,25 @@ def _m_step(self, G, doc_data): G = G.clone() self._prepare_graph(G, doc_data) G.update_all( - lambda edges: {'phi': EdgeData(edges.src, edges.dst).phi}, - dgl.function.sum('phi', 'nphi') + lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi}, + dgl.function.sum("phi", "nphi"), ) self._last_mean_change = self.word_data.update_from( - G, self.mult['word'], self.rho) + G, self.mult["word"], self.rho + ) if self.verbose: print(f"m-step mean_change={self._last_mean_change:.4f}, ", end="") - Bayesian_gap = np.mean([ - part.Bayesian_gap.mean().tolist() for part in self.word_data - ]) + Bayesian_gap = np.mean( + [part.Bayesian_gap.mean().tolist() for part in self.word_data] + ) print(f"Bayesian_gap={Bayesian_gap:.4f}") - def partial_fit(self, G): doc_data = self._e_step(G) self._m_step(G, doc_data) return self - def fit(self, G, mean_change_tol=1e-3, max_epochs=10): for i in range(max_epochs): if self.verbose: @@ -387,7 +427,6 @@ def fit(self, G, mean_change_tol=1e-3, max_epochs=10): break return self - def perplexity(self, G, doc_data=None): """ppl = exp{-sum[log(p(w1,...,wn|d))] / n} Follows Eq (15) in Hoffman et al., 2010. @@ -398,45 +437,50 @@ def perplexity(self, G, doc_data=None): # compute E[log p(docs | theta, beta)] G = G.clone() self._prepare_graph(G, doc_data) - G.apply_edges(lambda edges: {'loglike': EdgeData(edges.src, edges.dst).loglike}) - edge_elbo = (G.edata['loglike'].sum() / G.num_edges()).tolist() + G.apply_edges( + lambda edges: {"loglike": EdgeData(edges.src, edges.dst).loglike} + ) + edge_elbo = (G.edata["loglike"].sum() / G.num_edges()).tolist() if self.verbose: - print(f'neg_elbo phi: {-edge_elbo:.3f}', end=' ') + print(f"neg_elbo phi: {-edge_elbo:.3f}", end=" ") # compute E[log p(theta | alpha) - log q(theta | gamma)] doc_elbo = (doc_data.loglike.sum() / doc_data.n.sum()).tolist() if self.verbose: - print(f'theta: {-doc_elbo:.3f}', end=' ') + print(f"theta: {-doc_elbo:.3f}", end=" ") # compute E[log p(beta | eta) - log q(beta | lambda)] # The denominator n for extrapolation perplexity is undefined. # We use the train set, whereas sklearn uses the test set. - word_elbo = ( - sum([part.loglike.sum().tolist() for part in self.word_data]) - / sum([part.n.sum().tolist() for part in self.word_data]) - ) + word_elbo = sum( + [part.loglike.sum().tolist() for part in self.word_data] + ) / sum([part.n.sum().tolist() for part in self.word_data]) if self.verbose: - print(f'beta: {-word_elbo:.3f}') + print(f"beta: {-word_elbo:.3f}") ppl = np.exp(-edge_elbo - doc_elbo - word_elbo) - if G.num_edges()>0 and np.isnan(ppl): + if G.num_edges() > 0 and np.isnan(ppl): warnings.warn("numerical issue in perplexity") return ppl def doc_subgraph(G, doc_ids): sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) - _, _, (block,) = sampler.sample(G.reverse(), {'doc': torch.as_tensor(doc_ids)}) + _, _, (block,) = sampler.sample( + G.reverse(), {"doc": torch.as_tensor(doc_ids)} + ) B = dgl.DGLHeteroGraph( - block._graph, ['_', 'word', 'doc', '_'], block.etypes + block._graph, ["_", "word", "doc", "_"], block.etypes ).reverse() - B.nodes['word'].data['_ID'] = block.nodes['word'].data['_ID'] + B.nodes["word"].data["_ID"] = block.nodes["word"].data["_ID"] return B -if __name__ == '__main__': - print('Testing LatentDirichletAllocation ...') - G = dgl.heterograph({('doc', '', 'word'): [(0, 0), (1, 3)]}, {'doc': 2, 'word': 5}) +if __name__ == "__main__": + print("Testing LatentDirichletAllocation ...") + G = dgl.heterograph( + {("doc", "", "word"): [(0, 0), (1, 3)]}, {"doc": 2, "word": 5} + ) model = LatentDirichletAllocation(n_words=5, n_components=10, verbose=False) model.fit(G) model.transform(G) @@ -454,4 +498,4 @@ def doc_subgraph(G, doc_ids): f.seek(0) print(torch.load(f)) - print('Testing LatentDirichletAllocation passed!') + print("Testing LatentDirichletAllocation passed!") diff --git a/examples/pytorch/line_graph/train.py b/examples/pytorch/line_graph/train.py index 10a7a8b5182f..763450745afa 100644 --- a/examples/pytorch/line_graph/train.py +++ b/examples/pytorch/line_graph/train.py @@ -6,11 +6,12 @@ """ from __future__ import division -import time import argparse +import time from itertools import permutations +import gnn import numpy as np import torch as th import torch.nn.functional as F @@ -18,37 +19,51 @@ from torch.utils.data import DataLoader from dgl.data import SBMMixtureDataset -import gnn parser = argparse.ArgumentParser() -parser.add_argument('--batch-size', type=int, help='Batch size', default=1) -parser.add_argument('--gpu', type=int, help='GPU index', default=-1) -parser.add_argument('--lr', type=float, help='Learning rate', default=0.001) -parser.add_argument('--n-communities', type=int, help='Number of communities', default=2) -parser.add_argument('--n-epochs', type=int, help='Number of epochs', default=100) -parser.add_argument('--n-features', type=int, help='Number of features', default=16) -parser.add_argument('--n-graphs', type=int, help='Number of graphs', default=10) -parser.add_argument('--n-layers', type=int, help='Number of layers', default=30) -parser.add_argument('--n-nodes', type=int, help='Number of nodes', default=10000) -parser.add_argument('--optim', type=str, help='Optimizer', default='Adam') -parser.add_argument('--radius', type=int, help='Radius', default=3) -parser.add_argument('--verbose', action='store_true') +parser.add_argument("--batch-size", type=int, help="Batch size", default=1) +parser.add_argument("--gpu", type=int, help="GPU index", default=-1) +parser.add_argument("--lr", type=float, help="Learning rate", default=0.001) +parser.add_argument( + "--n-communities", type=int, help="Number of communities", default=2 +) +parser.add_argument( + "--n-epochs", type=int, help="Number of epochs", default=100 +) +parser.add_argument( + "--n-features", type=int, help="Number of features", default=16 +) +parser.add_argument("--n-graphs", type=int, help="Number of graphs", default=10) +parser.add_argument("--n-layers", type=int, help="Number of layers", default=30) +parser.add_argument( + "--n-nodes", type=int, help="Number of nodes", default=10000 +) +parser.add_argument("--optim", type=str, help="Optimizer", default="Adam") +parser.add_argument("--radius", type=int, help="Radius", default=3) +parser.add_argument("--verbose", action="store_true") args = parser.parse_args() -dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu) +dev = th.device("cpu") if args.gpu < 0 else th.device("cuda:%d" % args.gpu) K = args.n_communities training_dataset = SBMMixtureDataset(args.n_graphs, args.n_nodes, K) -training_loader = DataLoader(training_dataset, args.batch_size, - collate_fn=training_dataset.collate_fn, drop_last=True) +training_loader = DataLoader( + training_dataset, + args.batch_size, + collate_fn=training_dataset.collate_fn, + drop_last=True, +) ones = th.ones(args.n_nodes // K) -y_list = [th.cat([x * ones for x in p]).long().to(dev) for p in permutations(range(K))] +y_list = [ + th.cat([x * ones for x in p]).long().to(dev) for p in permutations(range(K)) +] feats = [1] + [args.n_features] * args.n_layers + [K] model = gnn.GNN(feats, args.radius, K).to(dev) optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr) + def compute_overlap(z_list): ybar_list = [th.max(z, 1)[1] for z in z_list] overlap_list = [] @@ -58,15 +73,20 @@ def compute_overlap(z_list): overlap_list.append(overlap) return sum(overlap_list) / len(overlap_list) + def from_np(f, *args): def wrap(*args): - new = [th.from_numpy(x) if isinstance(x, np.ndarray) else x for x in args] + new = [ + th.from_numpy(x) if isinstance(x, np.ndarray) else x for x in args + ] return f(*new) + return wrap + @from_np def step(i, j, g, lg, deg_g, deg_lg, pm_pd): - """ One step of training. """ + """One step of training.""" g = g.to(dev) lg = lg.to(dev) deg_g = deg_g.to(dev).unsqueeze(1) @@ -77,7 +97,10 @@ def step(i, j, g, lg, deg_g, deg_lg, pm_pd): t_forward = time.time() - t0 z_list = th.chunk(z, args.batch_size, 0) - loss = sum(min(F.cross_entropy(z, y) for y in y_list) for z in z_list) / args.batch_size + loss = ( + sum(min(F.cross_entropy(z, y) for y in y_list) for z in z_list) + / args.batch_size + ) overlap = compute_overlap(z_list) optimizer.zero_grad() @@ -88,6 +111,7 @@ def step(i, j, g, lg, deg_g, deg_lg, pm_pd): return loss, overlap, t_forward, t_backward + @from_np def inference(g, lg, deg_g, deg_lg, pm_pd): g = g.to(dev) @@ -99,9 +123,11 @@ def inference(g, lg, deg_g, deg_lg, pm_pd): z = model(g, lg, deg_g, deg_lg, pm_pd) return z + + def test(): - p_list =[6, 5.5, 5, 4.5, 1.5, 1, 0.5, 0] - q_list =[0, 0.5, 1, 1.5, 4.5, 5, 5.5, 6] + p_list = [6, 5.5, 5, 4.5, 1.5, 1, 0.5, 0] + q_list = [0, 0.5, 1, 1.5, 4.5, 5, 5.5, 6] N = 1 overlap_list = [] for p, q in zip(p_list, q_list): @@ -112,31 +138,38 @@ def test(): overlap_list.append(compute_overlap(th.chunk(z, N, 0))) return overlap_list + n_iterations = args.n_graphs // args.batch_size for i in range(args.n_epochs): total_loss, total_overlap, s_forward, s_backward = 0, 0, 0, 0 for j, [g, lg, deg_g, deg_lg, pm_pd] in enumerate(training_loader): - loss, overlap, t_forward, t_backward = step(i, j, g, lg, deg_g, deg_lg, pm_pd) + loss, overlap, t_forward, t_backward = step( + i, j, g, lg, deg_g, deg_lg, pm_pd + ) total_loss += loss total_overlap += overlap s_forward += t_forward s_backward += t_backward - epoch = '0' * (len(str(args.n_epochs)) - len(str(i))) - iteration = '0' * (len(str(n_iterations)) - len(str(j))) + epoch = "0" * (len(str(args.n_epochs)) - len(str(i))) + iteration = "0" * (len(str(n_iterations)) - len(str(j))) if args.verbose: - print('[epoch %s%d iteration %s%d]loss %.3f | overlap %.3f' - % (epoch, i, iteration, j, loss, overlap)) + print( + "[epoch %s%d iteration %s%d]loss %.3f | overlap %.3f" + % (epoch, i, iteration, j, loss, overlap) + ) - epoch = '0' * (len(str(args.n_epochs)) - len(str(i))) + epoch = "0" * (len(str(args.n_epochs)) - len(str(i))) loss = total_loss / (j + 1) overlap = total_overlap / (j + 1) t_forward = s_forward / (j + 1) t_backward = s_backward / (j + 1) - print('[epoch %s%d]loss %.3f | overlap %.3f | forward time %.3fs | backward time %.3fs' - % (epoch, i, loss, overlap, t_forward, t_backward)) + print( + "[epoch %s%d]loss %.3f | overlap %.3f | forward time %.3fs | backward time %.3fs" + % (epoch, i, loss, overlap, t_forward, t_backward) + ) overlap_list = test() - overlap_str = ' - '.join(['%.3f' % overlap for overlap in overlap_list]) - print('[epoch %s%d]overlap: %s' % (epoch, i, overlap_str)) + overlap_str = " - ".join(["%.3f" % overlap for overlap in overlap_list]) + print("[epoch %s%d]overlap: %s" % (epoch, i, overlap_str)) diff --git a/examples/pytorch/mixhop/main.py b/examples/pytorch/mixhop/main.py index 8a0a7fe1fa97..41b870d74cee 100644 --- a/examples/pytorch/mixhop/main.py +++ b/examples/pytorch/mixhop/main.py @@ -2,16 +2,18 @@ import argparse import copy +import random + +import numpy as np import torch -import torch.optim as optim import torch.nn as nn -import numpy as np -import random +import torch.optim as optim +from tqdm import trange + import dgl import dgl.function as fn +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset -from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset -from tqdm import trange class MixHopConv(nn.Module): r""" @@ -44,13 +46,16 @@ class MixHopConv(nn.Module): batchnorm: bool, optional If True, use batch normalization. Defaults: ``False``. """ - def __init__(self, - in_dim, - out_dim, - p=[0, 1, 2], - dropout=0, - activation=None, - batchnorm=False): + + def __init__( + self, + in_dim, + out_dim, + p=[0, 1, 2], + dropout=0, + activation=None, + batchnorm=False, + ): super(MixHopConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim @@ -64,11 +69,11 @@ def __init__(self, # define batch norm layer if self.batchnorm: self.bn = nn.BatchNorm1d(out_dim * len(p)) - + # define weight dict for each power j - self.weights = nn.ModuleDict({ - str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p - }) + self.weights = nn.ModuleDict( + {str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p} + ) def forward(self, graph, feats): with graph.local_scope(): @@ -84,34 +89,37 @@ def forward(self, graph, feats): outputs.append(output) feats = feats * norm - graph.ndata['h'] = feats - graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) - feats = graph.ndata.pop('h') + graph.ndata["h"] = feats + graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) + feats = graph.ndata.pop("h") feats = feats * norm - + final = torch.cat(outputs, dim=1) - + if self.batchnorm: final = self.bn(final) - + if self.activation is not None: final = self.activation(final) - + final = self.dropout(final) return final + class MixHop(nn.Module): - def __init__(self, - in_dim, - hid_dim, - out_dim, - num_layers=2, - p=[0, 1, 2], - input_dropout=0.0, - layer_dropout=0.0, - activation=None, - batchnorm=False): + def __init__( + self, + in_dim, + hid_dim, + out_dim, + num_layers=2, + p=[0, 1, 2], + input_dropout=0.0, + layer_dropout=0.0, + activation=None, + batchnorm=False, + ): super(MixHop, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim @@ -127,68 +135,79 @@ def __init__(self, self.dropout = nn.Dropout(self.input_dropout) # Input layer - self.layers.append(MixHopConv(self.in_dim, - self.hid_dim, - p=self.p, - dropout=self.input_dropout, - activation=self.activation, - batchnorm=self.batchnorm)) - + self.layers.append( + MixHopConv( + self.in_dim, + self.hid_dim, + p=self.p, + dropout=self.input_dropout, + activation=self.activation, + batchnorm=self.batchnorm, + ) + ) + # Hidden layers with n - 1 MixHopConv layers for i in range(self.num_layers - 2): - self.layers.append(MixHopConv(self.hid_dim * len(args.p), - self.hid_dim, - p=self.p, - dropout=self.layer_dropout, - activation=self.activation, - batchnorm=self.batchnorm)) - - self.fc_layers = nn.Linear(self.hid_dim * len(args.p), self.out_dim, bias=False) + self.layers.append( + MixHopConv( + self.hid_dim * len(args.p), + self.hid_dim, + p=self.p, + dropout=self.layer_dropout, + activation=self.activation, + batchnorm=self.batchnorm, + ) + ) + + self.fc_layers = nn.Linear( + self.hid_dim * len(args.p), self.out_dim, bias=False + ) def forward(self, graph, feats): feats = self.dropout(feats) for layer in self.layers: feats = layer(graph, feats) - + feats = self.fc_layers(feats) return feats + def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset - if args.dataset == 'Cora': + if args.dataset == "Cora": dataset = CoraGraphDataset() - elif args.dataset == 'Citeseer': + elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() - elif args.dataset == 'Pubmed': + elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: - raise ValueError('Dataset {} is invalid.'.format(args.dataset)) - + raise ValueError("Dataset {} is invalid.".format(args.dataset)) + graph = dataset[0] graph = dgl.add_self_loop(graph) # check cuda if args.gpu >= 0 and torch.cuda.is_available(): - device = 'cuda:{}'.format(args.gpu) + device = "cuda:{}".format(args.gpu) else: - device = 'cpu' + device = "cpu" # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth - labels = graph.ndata.pop('label').to(device).long() + labels = graph.ndata.pop("label").to(device).long() # Extract node features - feats = graph.ndata.pop('feat').to(device) + feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test - train_mask = graph.ndata.pop('train_mask') - val_mask = graph.ndata.pop('val_mask') - test_mask = graph.ndata.pop('test_mask') + train_mask = graph.ndata.pop("train_mask") + val_mask = graph.ndata.pop("val_mask") + test_mask = graph.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) @@ -197,16 +216,18 @@ def main(args): graph = graph.to(device) # Step 2: Create model =================================================================== # - model = MixHop(in_dim=n_features, - hid_dim=args.hid_dim, - out_dim=n_classes, - num_layers=args.num_layers, - p=args.p, - input_dropout=args.input_dropout, - layer_dropout=args.layer_dropout, - activation=torch.tanh, - batchnorm=True) - + model = MixHop( + in_dim=n_features, + hid_dim=args.hid_dim, + out_dim=n_classes, + num_layers=args.num_layers, + p=args.p, + input_dropout=args.input_dropout, + layer_dropout=args.layer_dropout, + activation=torch.tanh, + batchnorm=True, + ) + model = model.to(device) best_model = copy.deepcopy(model) @@ -218,7 +239,7 @@ def main(args): # Step 4: training epoches =============================================================== # acc = 0 no_improvement = 0 - epochs = trange(args.epochs, desc='Accuracy & Loss') + epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: # Training using a full graph @@ -228,7 +249,9 @@ def main(args): # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) - train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx) + train_acc = torch.sum( + logits[train_idx].argmax(dim=1) == labels[train_idx] + ).item() / len(train_idx) # backward opt.zero_grad() @@ -240,54 +263,99 @@ def main(args): with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) - valid_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx) + valid_acc = torch.sum( + logits[val_idx].argmax(dim=1) == labels[val_idx] + ).item() / len(val_idx) # Print out performance - epochs.set_description('Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}'.format( - train_acc, train_loss.item(), valid_acc, valid_loss.item())) - + epochs.set_description( + "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( + train_acc, train_loss.item(), valid_acc, valid_loss.item() + ) + ) + if valid_acc < acc: no_improvement += 1 if no_improvement == args.early_stopping: - print('Early stop.') + print("Early stop.") break else: no_improvement = 0 acc = valid_acc best_model = copy.deepcopy(model) - + scheduler.step() best_model.eval() logits = best_model(graph, feats) - test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx) + test_acc = torch.sum( + logits[test_idx].argmax(dim=1) == labels[test_idx] + ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc + if __name__ == "__main__": """ MixHop Model Hyperparameters """ - parser = argparse.ArgumentParser(description='MixHop GCN') + parser = argparse.ArgumentParser(description="MixHop GCN") # data source params - parser.add_argument('--dataset', type=str, default='Cora', help='Name of dataset.') + parser.add_argument( + "--dataset", type=str, default="Cora", help="Name of dataset." + ) # cuda params - parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using CPU.') + parser.add_argument( + "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." + ) # training params - parser.add_argument('--epochs', type=int, default=2000, help='Training epochs.') - parser.add_argument('--early-stopping', type=int, default=200, help='Patient epochs to wait before early stopping.') - parser.add_argument('--lr', type=float, default=0.5, help='Learning rate.') - parser.add_argument('--lamb', type=float, default=5e-4, help='L2 reg.') - parser.add_argument('--step-size', type=int, default=40, help='Period of learning rate decay.') - parser.add_argument('--gamma', type=float, default=0.01, help='Multiplicative factor of learning rate decay.') + parser.add_argument( + "--epochs", type=int, default=2000, help="Training epochs." + ) + parser.add_argument( + "--early-stopping", + type=int, + default=200, + help="Patient epochs to wait before early stopping.", + ) + parser.add_argument("--lr", type=float, default=0.5, help="Learning rate.") + parser.add_argument("--lamb", type=float, default=5e-4, help="L2 reg.") + parser.add_argument( + "--step-size", + type=int, + default=40, + help="Period of learning rate decay.", + ) + parser.add_argument( + "--gamma", + type=float, + default=0.01, + help="Multiplicative factor of learning rate decay.", + ) # model params - parser.add_argument("--hid-dim", type=int, default=60, help='Hidden layer dimensionalities.') - parser.add_argument("--num-layers", type=int, default=4, help='Number of GNN layers.') - parser.add_argument("--input-dropout", type=float, default=0.7, help='Dropout applied at input layer.') - parser.add_argument("--layer-dropout", type=float, default=0.9, help='Dropout applied at hidden layers.') - parser.add_argument('--p', nargs='+', type=int, help='List of powers of adjacency matrix.') + parser.add_argument( + "--hid-dim", type=int, default=60, help="Hidden layer dimensionalities." + ) + parser.add_argument( + "--num-layers", type=int, default=4, help="Number of GNN layers." + ) + parser.add_argument( + "--input-dropout", + type=float, + default=0.7, + help="Dropout applied at input layer.", + ) + parser.add_argument( + "--layer-dropout", + type=float, + default=0.9, + help="Dropout applied at hidden layers.", + ) + parser.add_argument( + "--p", nargs="+", type=int, help="List of powers of adjacency matrix." + ) parser.set_defaults(p=[0, 1, 2]) @@ -298,13 +366,13 @@ def main(args): for _ in range(100): acc_lists.append(main(args)) - + acc_lists.sort() acc_lists_top = np.array(acc_lists[50:]) mean = np.around(np.mean(acc_lists_top, axis=0), decimals=3) std = np.around(np.std(acc_lists_top, axis=0), decimals=3) - print('Total acc: ', acc_lists) - print('Top 50 acc:', acc_lists_top) - print('mean', mean) - print('std', std) + print("Total acc: ", acc_lists) + print("Top 50 acc:", acc_lists_top) + print("mean", mean) + print("std", std) diff --git a/examples/pytorch/model_zoo/citation_network/conf.py b/examples/pytorch/model_zoo/citation_network/conf.py index 05f53c2ed58f..a4bed295147c 100644 --- a/examples/pytorch/model_zoo/citation_network/conf.py +++ b/examples/pytorch/model_zoo/citation_network/conf.py @@ -2,55 +2,55 @@ import torch.nn.functional as F GCN_CONFIG = { - 'extra_args': [16, 1, F.relu, 0.5], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [16, 1, F.relu, 0.5], + "lr": 1e-2, + "weight_decay": 5e-4, } GAT_CONFIG = { - 'extra_args': [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False], - 'lr': 0.005, - 'weight_decay': 5e-4, + "extra_args": [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False], + "lr": 0.005, + "weight_decay": 5e-4, } GRAPHSAGE_CONFIG = { - 'extra_args': [16, 1, F.relu, 0.5, 'gcn'], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [16, 1, F.relu, 0.5, "gcn"], + "lr": 1e-2, + "weight_decay": 5e-4, } APPNP_CONFIG = { - 'extra_args': [64, 1, F.relu, 0.5, 0.5, 0.1, 10], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [64, 1, F.relu, 0.5, 0.5, 0.1, 10], + "lr": 1e-2, + "weight_decay": 5e-4, } TAGCN_CONFIG = { - 'extra_args': [16, 1, F.relu, 0.5], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [16, 1, F.relu, 0.5], + "lr": 1e-2, + "weight_decay": 5e-4, } AGNN_CONFIG = { - 'extra_args': [32, 2, 1.0, True, 0.5], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [32, 2, 1.0, True, 0.5], + "lr": 1e-2, + "weight_decay": 5e-4, } SGC_CONFIG = { - 'extra_args': [None, 2, False], - 'lr': 0.2, - 'weight_decay': 5e-6, + "extra_args": [None, 2, False], + "lr": 0.2, + "weight_decay": 5e-6, } GIN_CONFIG = { - 'extra_args': [16, 1, 0, True], - 'lr': 1e-2, - 'weight_decay': 5e-6, + "extra_args": [16, 1, 0, True], + "lr": 1e-2, + "weight_decay": 5e-6, } CHEBNET_CONFIG = { - 'extra_args': [32, 1, 2, True], - 'lr': 1e-2, - 'weight_decay': 5e-4, + "extra_args": [32, 1, 2, True], + "lr": 1e-2, + "weight_decay": 5e-4, } diff --git a/examples/pytorch/model_zoo/citation_network/models.py b/examples/pytorch/model_zoo/citation_network/models.py index d22054a241e1..83293b6b9747 100644 --- a/examples/pytorch/model_zoo/citation_network/models.py +++ b/examples/pytorch/model_zoo/citation_network/models.py @@ -1,18 +1,23 @@ import torch import torch.nn as nn -from dgl.nn.pytorch import GraphConv, GATConv, SAGEConv, GINConv,\ - APPNPConv, TAGConv, SGConv, AGNNConv, ChebConv + +from dgl.nn.pytorch import ( + AGNNConv, + APPNPConv, + ChebConv, + GATConv, + GINConv, + GraphConv, + SAGEConv, + SGConv, + TAGConv, +) class GCN(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - activation, - dropout): + def __init__( + self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout + ): super(GCN, self).__init__() self.g = g self.layers = nn.ModuleList() @@ -20,7 +25,9 @@ def __init__(self, self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): - self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) + self.layers.append( + GraphConv(n_hidden, n_hidden, activation=activation) + ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) @@ -35,37 +42,66 @@ def forward(self, features): class GAT(nn.Module): - def __init__(self, - g, - in_dim, - num_classes, - num_hidden, - num_layers, - heads, - activation, - feat_drop, - attn_drop, - negative_slope, - residual): + def __init__( + self, + g, + in_dim, + num_classes, + num_hidden, + num_layers, + heads, + activation, + feat_drop, + attn_drop, + negative_slope, + residual, + ): super(GAT, self).__init__() self.g = g self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = activation # input projection (no residual) - self.gat_layers.append(GATConv( - in_dim, num_hidden, heads[0], - feat_drop, attn_drop, negative_slope, False, self.activation)) + self.gat_layers.append( + GATConv( + in_dim, + num_hidden, + heads[0], + feat_drop, + attn_drop, + negative_slope, + False, + self.activation, + ) + ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads - self.gat_layers.append(GATConv( - num_hidden * heads[l-1], num_hidden, heads[l], - feat_drop, attn_drop, negative_slope, residual, self.activation)) + self.gat_layers.append( + GATConv( + num_hidden * heads[l - 1], + num_hidden, + heads[l], + feat_drop, + attn_drop, + negative_slope, + residual, + self.activation, + ) + ) # output projection - self.gat_layers.append(GATConv( - num_hidden * heads[-2], num_classes, heads[-1], - feat_drop, attn_drop, negative_slope, residual, None)) + self.gat_layers.append( + GATConv( + num_hidden * heads[-2], + num_classes, + heads[-1], + feat_drop, + attn_drop, + negative_slope, + residual, + None, + ) + ) def forward(self, inputs): h = inputs @@ -77,26 +113,52 @@ def forward(self, inputs): class GraphSAGE(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - activation, - dropout, - aggregator_type): + def __init__( + self, + g, + in_feats, + n_classes, + n_hidden, + n_layers, + activation, + dropout, + aggregator_type, + ): super(GraphSAGE, self).__init__() self.layers = nn.ModuleList() self.g = g # input layer - self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation)) + self.layers.append( + SAGEConv( + in_feats, + n_hidden, + aggregator_type, + feat_drop=dropout, + activation=activation, + ) + ) # hidden layers for i in range(n_layers - 1): - self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation)) + self.layers.append( + SAGEConv( + n_hidden, + n_hidden, + aggregator_type, + feat_drop=dropout, + activation=activation, + ) + ) # output layer - self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None + self.layers.append( + SAGEConv( + n_hidden, + n_classes, + aggregator_type, + feat_drop=dropout, + activation=None, + ) + ) # activation None def forward(self, features): h = features @@ -106,17 +168,19 @@ def forward(self, features): class APPNP(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - activation, - feat_drop, - edge_drop, - alpha, - k): + def __init__( + self, + g, + in_feats, + n_classes, + n_hidden, + n_layers, + activation, + feat_drop, + edge_drop, + alpha, + k, + ): super(APPNP, self).__init__() self.g = g self.layers = nn.ModuleList() @@ -153,14 +217,9 @@ def forward(self, features): class TAGCN(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - activation, - dropout): + def __init__( + self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout + ): super(TAGCN, self).__init__() self.g = g self.layers = nn.ModuleList() @@ -168,9 +227,11 @@ def __init__(self, self.layers.append(TAGConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): - self.layers.append(TAGConv(n_hidden, n_hidden, activation=activation)) + self.layers.append( + TAGConv(n_hidden, n_hidden, activation=activation) + ) # output layer - self.layers.append(TAGConv(n_hidden, n_classes)) #activation=None + self.layers.append(TAGConv(n_hidden, n_classes)) # activation=None self.dropout = nn.Dropout(p=dropout) def forward(self, features): @@ -183,28 +244,27 @@ def forward(self, features): class AGNN(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - init_beta, - learn_beta, - dropout): + def __init__( + self, + g, + in_feats, + n_classes, + n_hidden, + n_layers, + init_beta, + learn_beta, + dropout, + ): super(AGNN, self).__init__() self.g = g self.layers = nn.ModuleList( [AGNNConv(init_beta, learn_beta) for _ in range(n_layers)] ) self.proj = nn.Sequential( - nn.Dropout(dropout), - nn.Linear(in_feats, n_hidden), - nn.ReLU() + nn.Dropout(dropout), nn.Linear(in_feats, n_hidden), nn.ReLU() ) self.cls = nn.Sequential( - nn.Dropout(dropout), - nn.Linear(n_hidden, n_classes) + nn.Dropout(dropout), nn.Linear(n_hidden, n_classes) ) def forward(self, features): @@ -215,34 +275,19 @@ def forward(self, features): class SGC(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - k, - bias): + def __init__(self, g, in_feats, n_classes, n_hidden, k, bias): super(SGC, self).__init__() self.g = g - self.net = SGConv(in_feats, - n_classes, - k=k, - cached=True, - bias=bias) + self.net = SGConv(in_feats, n_classes, k=k, cached=True, bias=bias) def forward(self, features): return self.net(self.g, features) class GIN(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - init_eps, - learn_eps): + def __init__( + self, g, in_feats, n_classes, n_hidden, n_layers, init_eps, learn_eps + ): super(GIN, self).__init__() self.g = g self.layers = nn.ModuleList() @@ -253,9 +298,9 @@ def __init__(self, nn.Linear(in_feats, n_hidden), nn.ReLU(), ), - 'mean', + "mean", init_eps, - learn_eps + learn_eps, ) ) for i in range(n_layers - 1): @@ -264,11 +309,11 @@ def __init__(self, nn.Sequential( nn.Dropout(0.6), nn.Linear(n_hidden, n_hidden), - nn.ReLU() + nn.ReLU(), ), - 'mean', + "mean", init_eps, - learn_eps + learn_eps, ) ) self.layers.append( @@ -277,9 +322,9 @@ def __init__(self, nn.Dropout(0.6), nn.Linear(n_hidden, n_classes), ), - 'mean', + "mean", init_eps, - learn_eps + learn_eps, ) ) @@ -289,29 +334,17 @@ def forward(self, features): h = layer(self.g, h) return h + class ChebNet(nn.Module): - def __init__(self, - g, - in_feats, - n_classes, - n_hidden, - n_layers, - k, - bias): + def __init__(self, g, in_feats, n_classes, n_hidden, n_layers, k, bias): super(ChebNet, self).__init__() self.g = g self.layers = nn.ModuleList() - self.layers.append( - ChebConv(in_feats, n_hidden, k, bias=bias) - ) + self.layers.append(ChebConv(in_feats, n_hidden, k, bias=bias)) for _ in range(n_layers - 1): - self.layers.append( - ChebConv(n_hidden, n_hidden, k, bias=bias) - ) + self.layers.append(ChebConv(n_hidden, n_hidden, k, bias=bias)) - self.layers.append( - ChebConv(n_hidden, n_classes, k, bias=bias) - ) + self.layers.append(ChebConv(n_hidden, n_classes, k, bias=bias)) def forward(self, features): h = features diff --git a/examples/pytorch/model_zoo/citation_network/run.py b/examples/pytorch/model_zoo/citation_network/run.py index f3f03ff27803..d93b1a73b795 100644 --- a/examples/pytorch/model_zoo/citation_network/run.py +++ b/examples/pytorch/model_zoo/citation_network/run.py @@ -1,36 +1,40 @@ -import argparse, time +import argparse +import time + +import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -import dgl -from dgl.data import register_data_args, load_data -from models import * from conf import * -import networkx as nx +from models import * + +import dgl +from dgl.data import load_data, register_data_args def get_model_and_config(name): name = name.lower() - if name == 'gcn': + if name == "gcn": return GCN, GCN_CONFIG - elif name == 'gat': + elif name == "gat": return GAT, GAT_CONFIG - elif name == 'graphsage': + elif name == "graphsage": return GraphSAGE, GRAPHSAGE_CONFIG - elif name == 'appnp': + elif name == "appnp": return APPNP, APPNP_CONFIG - elif name == 'tagcn': + elif name == "tagcn": return TAGCN, TAGCN_CONFIG - elif name == 'agnn': + elif name == "agnn": return AGNN, AGNN_CONFIG - elif name == 'sgc': + elif name == "sgc": return SGC, SGC_CONFIG - elif name == 'gin': + elif name == "gin": return GIN, GIN_CONFIG - elif name == 'chebnet': + elif name == "chebnet": return ChebNet, CHEBNET_CONFIG + def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): @@ -41,6 +45,7 @@ def evaluate(model, features, labels, mask): correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) + def main(args): # load and preprocess dataset data = load_data(args) @@ -50,24 +55,29 @@ def main(args): else: cuda = True g = g.to(args.gpu) - features = g.ndata['feat'] - labels = g.ndata['label'] - train_mask = g.ndata['train_mask'] - val_mask = g.ndata['val_mask'] - test_mask = g.ndata['test_mask'] + features = g.ndata["feat"] + labels = g.ndata["label"] + train_mask = g.ndata["train_mask"] + val_mask = g.ndata["val_mask"] + test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() - print("""----Data statistics------' + print( + """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d - #Test samples %d""" % - (n_edges, n_classes, - train_mask.int().sum().item(), - val_mask.int().sum().item(), - test_mask.int().sum().item())) + #Test samples %d""" + % ( + n_edges, + n_classes, + train_mask.int().sum().item(), + val_mask.int().sum().item(), + test_mask.int().sum().item(), + ) + ) # graph preprocess and calculate normalization factor # add self loop @@ -79,14 +89,11 @@ def main(args): degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 - g.ndata['norm'] = norm.unsqueeze(1) + g.ndata["norm"] = norm.unsqueeze(1) # create GCN model GNN, config = get_model_and_config(args.model) - model = GNN(g, - in_feats, - n_classes, - *config['extra_args']) + model = GNN(g, in_feats, n_classes, *config["extra_args"]) if cuda: model = model.cuda() @@ -96,9 +103,9 @@ def main(args): loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer - optimizer = torch.optim.Adam(model.parameters(), - lr=config['lr'], - weight_decay=config['weight_decay']) + optimizer = torch.optim.Adam( + model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"] + ) # initialize graph dur = [] @@ -118,25 +125,40 @@ def main(args): dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) - print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " - "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), - acc, n_edges / np.mean(dur) / 1000)) + print( + "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " + "ETputs(KTEPS) {:.2f}".format( + epoch, + np.mean(dur), + loss.item(), + acc, + n_edges / np.mean(dur) / 1000, + ) + ) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Node classification on citation networks.') +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Node classification on citation networks." + ) register_data_args(parser) - parser.add_argument("--model", type=str, default='gcn', - help='model to use, available models are gcn, gat, graphsage, gin,' - 'appnp, tagcn, sgc, agnn') - parser.add_argument("--gpu", type=int, default=-1, - help="gpu") - parser.add_argument("--self-loop", action='store_true', - help="graph self-loop (default=False)") + parser.add_argument( + "--model", + type=str, + default="gcn", + help="model to use, available models are gcn, gat, graphsage, gin," + "appnp, tagcn, sgc, agnn", + ) + parser.add_argument("--gpu", type=int, default=-1, help="gpu") + parser.add_argument( + "--self-loop", + action="store_true", + help="graph self-loop (default=False)", + ) args = parser.parse_args() print(args) main(args) diff --git a/examples/pytorch/model_zoo/geometric/coarsening.py b/examples/pytorch/model_zoo/geometric/coarsening.py index 57821bcf391f..307734554852 100644 --- a/examples/pytorch/model_zoo/geometric/coarsening.py +++ b/examples/pytorch/model_zoo/geometric/coarsening.py @@ -31,7 +31,7 @@ def laplacian(W, normalized=True): def rescale_L(L, lmax=2): """Rescale Laplacian eigenvalues to [-1,1]""" M, M = L.shape - I = scipy.sparse.identity(M, format='csr', dtype=L.dtype) + I = scipy.sparse.identity(M, format="csr", dtype=L.dtype) L /= lmax * 2 L -= I return L @@ -39,7 +39,9 @@ def rescale_L(L, lmax=2): def lmax_L(L): """Compute largest Laplacian eigenvalue""" - return scipy.sparse.linalg.eigsh(L, k=1, which='LM', return_eigenvectors=False)[0] + return scipy.sparse.linalg.eigsh( + L, k=1, which="LM", return_eigenvectors=False + )[0] # graph coarsening with Heavy Edge Matching @@ -57,7 +59,11 @@ def coarsen(A, levels): A = A.tocsr() A.eliminate_zeros() Mnew, Mnew = A.shape - print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew - M, A.nnz // 2)) + print( + "Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges".format( + i, Mnew, Mnew - M, A.nnz // 2 + ) + ) L = laplacian(A, normalized=True) laplacians.append(L) @@ -95,7 +101,7 @@ def HEM(W, levels, rid=None): graphs = [] graphs.append(W) - print('Heavy Edge Matching coarsening with Xavier version') + print("Heavy Edge Matching coarsening with Xavier version") for _ in range(levels): @@ -183,7 +189,9 @@ def HEM_one_level(rr, cc, vv, rid, weights): # First approach if 2 == 1: - tval = vv[rs + jj] * (1.0 / weights[tid] + 1.0 / weights[nid]) + tval = vv[rs + jj] * ( + 1.0 / weights[tid] + 1.0 / weights[nid] + ) # Second approach if 1 == 1: @@ -192,7 +200,7 @@ def HEM_one_level(rr, cc, vv, rid, weights): Wjj = vv[rowstart[nid]] di = weights[tid] dj = weights[nid] - tval = (2. * Wij + Wii + Wjj) * 1. / (di + dj + 1e-9) + tval = (2.0 * Wij + Wii + Wjj) * 1.0 / (di + dj + 1e-9) if tval > wmax: wmax = tval @@ -247,7 +255,7 @@ def compute_perm(parents): # Sanity checks. for i, indices_layer in enumerate(indices): - M = M_last * 2 ** i + M = M_last * 2**i # Reduction by 2 at each layer (binary tree). assert len(indices[0] == M) # The new ordering does not omit an indice. @@ -256,8 +264,9 @@ def compute_perm(parents): return indices[::-1] -assert (compute_perm([np.array([4, 1, 1, 2, 2, 3, 0, 0, 3]), np.array([2, 1, 0, 1, 0])]) - == [[3, 4, 0, 9, 1, 2, 5, 8, 6, 7, 10, 11], [2, 4, 1, 3, 0, 5], [0, 1, 2]]) +assert compute_perm( + [np.array([4, 1, 1, 2, 2, 3, 0, 0, 3]), np.array([2, 1, 0, 1, 0])] +) == [[3, 4, 0, 9, 1, 2, 5, 8, 6, 7, 10, 11], [2, 4, 1, 3, 0, 5], [0, 1, 2]] def perm_adjacency(A, indices): diff --git a/examples/pytorch/model_zoo/geometric/coordinate.py b/examples/pytorch/model_zoo/geometric/coordinate.py index cdd473a84890..44cb0a2c99a8 100644 --- a/examples/pytorch/model_zoo/geometric/coordinate.py +++ b/examples/pytorch/model_zoo/geometric/coordinate.py @@ -2,6 +2,8 @@ """Compute x,y coordinate for nodes in the graph""" eps = 1e-8 + + def get_coordinates(graphs, grid_side, coarsening_levels, perm): rst = [] for l in range(coarsening_levels + 1): @@ -10,21 +12,25 @@ def get_coordinates(graphs, grid_side, coarsening_levels, perm): cnt = eps x_accum = 0 y_accum = 0 - for j in range(i * 2 ** l, (i + 1) * 2 ** l): - if perm[j] < grid_side ** 2: - x_accum += (perm[j] // grid_side) - y_accum += (perm[j] % grid_side) + for j in range(i * 2**l, (i + 1) * 2**l): + if perm[j] < grid_side**2: + x_accum += perm[j] // grid_side + y_accum += perm[j] % grid_side cnt += 1 xs.append(x_accum / cnt) ys.append(y_accum / cnt) - rst.append(th.cat([th.tensor(xs).view(-1, 1), th.tensor(ys).view(-1, 1)], -1)) + rst.append( + th.cat([th.tensor(xs).view(-1, 1), th.tensor(ys).view(-1, 1)], -1) + ) return rst + """Cartesian coordinate to polar coordinate""" + + def z2polar(edges): - z = edges.dst['xy'] - edges.src['xy'] + z = edges.dst["xy"] - edges.src["xy"] rho = th.norm(z, dim=-1, p=2) x, y = z.unbind(dim=-1) phi = th.atan2(y, x) - return {'u': th.cat([rho.unsqueeze(-1), phi.unsqueeze(-1)], -1)} - + return {"u": th.cat([rho.unsqueeze(-1), phi.unsqueeze(-1)], -1)} diff --git a/examples/pytorch/model_zoo/geometric/grid_graph.py b/examples/pytorch/model_zoo/geometric/grid_graph.py index 74f36d255a13..5b5c1f6e94c9 100644 --- a/examples/pytorch/model_zoo/geometric/grid_graph.py +++ b/examples/pytorch/model_zoo/geometric/grid_graph.py @@ -1,41 +1,42 @@ # author: xbresson # code link: https://github.com/xbresson/CE7454_2019/blob/master/codes/labs_lecture14/lab01_ChebGCNs/lib/grid_graph.py +import numpy as np +import scipy.sparse # scipy.spatial.distance +import scipy.sparse.linalg import sklearn import sklearn.metrics -import scipy.sparse, scipy.sparse.linalg # scipy.spatial.distance -import numpy as np -def grid_graph(grid_side,number_edges,metric): +def grid_graph(grid_side, number_edges, metric): """Generate graph of a grid""" z = grid(grid_side) dist, idx = distance_sklearn_metrics(z, k=number_edges, metric=metric) A = adjacency(dist, idx) - print("nb edges: ",A.nnz) + print("nb edges: ", A.nnz) return A def grid(m, dtype=np.float32): """Return coordinates of grid points""" M = m**2 - x = np.linspace(0,1,m, dtype=dtype) - y = np.linspace(0,1,m, dtype=dtype) + x = np.linspace(0, 1, m, dtype=dtype) + y = np.linspace(0, 1, m, dtype=dtype) xx, yy = np.meshgrid(x, y) - z = np.empty((M,2), dtype) - z[:,0] = xx.reshape(M) - z[:,1] = yy.reshape(M) + z = np.empty((M, 2), dtype) + z[:, 0] = xx.reshape(M) + z[:, 1] = yy.reshape(M) return z -def distance_sklearn_metrics(z, k=4, metric='euclidean'): +def distance_sklearn_metrics(z, k=4, metric="euclidean"): """Compute pairwise distances""" - #d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2) + # d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2) d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=1) # k-NN - idx = np.argsort(d)[:,1:k+1] + idx = np.argsort(d)[:, 1 : k + 1] d.sort() - d = d[:,1:k+1] + d = d[:, 1 : k + 1] return d, idx @@ -47,13 +48,13 @@ def adjacency(dist, idx): assert dist.max() <= 1 # Pairwise distances - sigma2 = np.mean(dist[:,-1])**2 - dist = np.exp(- dist**2 / sigma2) + sigma2 = np.mean(dist[:, -1]) ** 2 + dist = np.exp(-(dist**2) / sigma2) # Weight matrix I = np.arange(0, M).repeat(k) - J = idx.reshape(M*k) - V = dist.reshape(M*k) + J = idx.reshape(M * k) + V = dist.reshape(M * k) W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M)) # No self-connections diff --git a/examples/pytorch/model_zoo/geometric/mnist.py b/examples/pytorch/model_zoo/geometric/mnist.py index ad1765341122..ea5dfe913693 100644 --- a/examples/pytorch/model_zoo/geometric/mnist.py +++ b/examples/pytorch/model_zoo/geometric/mnist.py @@ -1,32 +1,35 @@ import argparse import time -import numpy as np + import networkx as nx +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -import dgl +from coarsening import coarsen +from coordinate import get_coordinates, z2polar +from grid_graph import grid_graph from torch.utils.data import DataLoader from torchvision import datasets, transforms -from dgl.data import register_data_args, load_data + +import dgl +from dgl.data import load_data, register_data_args from dgl.nn.pytorch.conv import ChebConv, GMMConv from dgl.nn.pytorch.glob import MaxPooling -from grid_graph import grid_graph -from coarsening import coarsen -from coordinate import get_coordinates, z2polar argparser = argparse.ArgumentParser("MNIST") -argparser.add_argument("--gpu", type=int, default=-1, - help="gpu id, use cpu if set to -1") -argparser.add_argument("--model", type=str, default="chebnet", - help="model to use, chebnet/monet") -argparser.add_argument("--batch-size", type=int, default=100, - help="batch size") +argparser.add_argument( + "--gpu", type=int, default=-1, help="gpu id, use cpu if set to -1" +) +argparser.add_argument( + "--model", type=str, default="chebnet", help="model to use, chebnet/monet" +) +argparser.add_argument("--batch-size", type=int, default=100, help="batch size") args = argparser.parse_args() grid_side = 28 number_edges = 8 -metric = 'euclidean' +metric = "euclidean" A = grid_graph(28, 8, metric) @@ -35,18 +38,25 @@ g_arr = [dgl.from_scipy(csr) for csr in L] coordinate_arr = get_coordinates(g_arr, grid_side, coarsening_levels, perm) -str_to_torch_dtype = {'float16':torch.half, 'float32':torch.float32, 'float64':torch.float64} -coordinate_arr = [coord.to(dtype=str_to_torch_dtype[str(A.dtype)]) for coord in coordinate_arr] +str_to_torch_dtype = { + "float16": torch.half, + "float32": torch.float32, + "float64": torch.float64, +} +coordinate_arr = [ + coord.to(dtype=str_to_torch_dtype[str(A.dtype)]) for coord in coordinate_arr +] for g, coordinate_arr in zip(g_arr, coordinate_arr): - g.ndata['xy'] = coordinate_arr + g.ndata["xy"] = coordinate_arr g.apply_edges(z2polar) + def batcher(batch): g_batch = [[] for _ in range(coarsening_levels + 1)] x_batch = [] y_batch = [] for x, y in batch: - x = torch.cat([x.view(-1), x.new_zeros(len(perm) - 28 ** 2)], 0) + x = torch.cat([x.view(-1), x.new_zeros(len(perm) - 28**2)], 0) x = x[perm] x_batch.append(x) y_batch.append(y) @@ -58,87 +68,98 @@ def batcher(batch): g_batch = [dgl.batch(g) for g in g_batch] return g_batch, x_batch, y_batch -trainset = datasets.MNIST(root='.', train=True, download=True, transform=transforms.ToTensor()) -testset = datasets.MNIST(root='.', train=False, download=True, transform=transforms.ToTensor()) -train_loader = DataLoader(trainset, - batch_size=args.batch_size, - shuffle=True, - collate_fn=batcher, - num_workers=6) -test_loader = DataLoader(testset, - batch_size=args.batch_size, - shuffle=False, - collate_fn=batcher, - num_workers=6) +trainset = datasets.MNIST( + root=".", train=True, download=True, transform=transforms.ToTensor() +) +testset = datasets.MNIST( + root=".", train=False, download=True, transform=transforms.ToTensor() +) + +train_loader = DataLoader( + trainset, + batch_size=args.batch_size, + shuffle=True, + collate_fn=batcher, + num_workers=6, +) +test_loader = DataLoader( + testset, + batch_size=args.batch_size, + shuffle=False, + collate_fn=batcher, + num_workers=6, +) + class MoNet(nn.Module): - def __init__(self, - n_kernels, - in_feats, - hiddens, - out_feats): + def __init__(self, n_kernels, in_feats, hiddens, out_feats): super(MoNet, self).__init__() self.pool = nn.MaxPool1d(2) self.layers = nn.ModuleList() self.readout = MaxPooling() # Input layer - self.layers.append( - GMMConv(in_feats, hiddens[0], 2, n_kernels)) + self.layers.append(GMMConv(in_feats, hiddens[0], 2, n_kernels)) # Hidden layer for i in range(1, len(hiddens)): - self.layers.append(GMMConv(hiddens[i - 1], hiddens[i], 2, n_kernels)) + self.layers.append( + GMMConv(hiddens[i - 1], hiddens[i], 2, n_kernels) + ) self.cls = nn.Sequential( - nn.Linear(hiddens[-1], out_feats), - nn.LogSoftmax() + nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax() ) def forward(self, g_arr, feat): for g, layer in zip(g_arr, self.layers): - u = g.edata['u'] - feat = self.pool(layer(g, feat, u).transpose(-1, -2).unsqueeze(0))\ - .squeeze(0).transpose(-1, -2) + u = g.edata["u"] + feat = ( + self.pool(layer(g, feat, u).transpose(-1, -2).unsqueeze(0)) + .squeeze(0) + .transpose(-1, -2) + ) return self.cls(self.readout(g_arr[-1], feat)) + class ChebNet(nn.Module): - def __init__(self, - k, - in_feats, - hiddens, - out_feats): + def __init__(self, k, in_feats, hiddens, out_feats): super(ChebNet, self).__init__() self.pool = nn.MaxPool1d(2) self.layers = nn.ModuleList() self.readout = MaxPooling() # Input layer - self.layers.append( - ChebConv(in_feats, hiddens[0], k)) + self.layers.append(ChebConv(in_feats, hiddens[0], k)) for i in range(1, len(hiddens)): - self.layers.append( - ChebConv(hiddens[i - 1], hiddens[i], k)) + self.layers.append(ChebConv(hiddens[i - 1], hiddens[i], k)) self.cls = nn.Sequential( - nn.Linear(hiddens[-1], out_feats), - nn.LogSoftmax() + nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax() ) def forward(self, g_arr, feat): for g, layer in zip(g_arr, self.layers): - feat = self.pool(layer(g, feat, [2] * g.batch_size).transpose(-1, -2).unsqueeze(0))\ - .squeeze(0).transpose(-1, -2) + feat = ( + self.pool( + layer(g, feat, [2] * g.batch_size) + .transpose(-1, -2) + .unsqueeze(0) + ) + .squeeze(0) + .transpose(-1, -2) + ) return self.cls(self.readout(g_arr[-1], feat)) + if args.gpu == -1: - device = torch.device('cpu') + device = torch.device("cpu") else: device = torch.device(args.gpu) -if args.model == 'chebnet': +if args.model == "chebnet": model = ChebNet(2, 1, [32, 64, 128, 256], 10) else: model = MoNet(10, 1, [32, 64, 128, 256], 10) @@ -149,7 +170,7 @@ def forward(self, g_arr, feat): log_interval = 50 for epoch in range(10): - print('epoch {} starts'.format(epoch)) + print("epoch {} starts".format(epoch)) model.train() hit, tot = 0, 0 loss_accum = 0 @@ -164,7 +185,9 @@ def forward(self, g_arr, feat): loss_accum += loss.item() if (i + 1) % log_interval == 0: - print('loss: {}, acc: {}'.format(loss_accum / log_interval, hit / tot)) + print( + "loss: {}, acc: {}".format(loss_accum / log_interval, hit / tot) + ) hit, tot = 0, 0 loss_accum = 0 @@ -182,4 +205,4 @@ def forward(self, g_arr, feat): hit += (out.max(-1)[1] == y).sum().item() tot += len(y) - print('test acc: ', hit / tot) + print("test acc: ", hit / tot) diff --git a/examples/pytorch/monet/citation.py b/examples/pytorch/monet/citation.py index 2a715ac7545c..feb314e8124f 100644 --- a/examples/pytorch/monet/citation.py +++ b/examples/pytorch/monet/citation.py @@ -1,46 +1,46 @@ import argparse import time -import numpy as np + import networkx as nx +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F + from dgl import DGLGraph -from dgl.data import register_data_args, load_data +from dgl.data import load_data, register_data_args from dgl.nn.pytorch.conv import GMMConv class MoNet(nn.Module): - def __init__(self, - g, - in_feats, - n_hidden, - out_feats, - n_layers, - dim, - n_kernels, - dropout): + def __init__( + self, + g, + in_feats, + n_hidden, + out_feats, + n_layers, + dim, + n_kernels, + dropout, + ): super(MoNet, self).__init__() self.g = g self.layers = nn.ModuleList() self.pseudo_proj = nn.ModuleList() # Input layer - self.layers.append( - GMMConv(in_feats, n_hidden, dim, n_kernels)) - self.pseudo_proj.append( - nn.Sequential(nn.Linear(2, dim), nn.Tanh())) + self.layers.append(GMMConv(in_feats, n_hidden, dim, n_kernels)) + self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) # Hidden layer for _ in range(n_layers - 1): self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels)) - self.pseudo_proj.append( - nn.Sequential(nn.Linear(2, dim), nn.Tanh())) + self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) # Output layer self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels)) - self.pseudo_proj.append( - nn.Sequential(nn.Linear(2, dim), nn.Tanh())) + self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) self.dropout = nn.Dropout(dropout) def forward(self, feat, pseudo): @@ -48,10 +48,10 @@ def forward(self, feat, pseudo): for i in range(len(self.layers)): if i != 0: h = self.dropout(h) - h = self.layers[i]( - self.g, h, self.pseudo_proj[i](pseudo)) + h = self.layers[i](self.g, h, self.pseudo_proj[i](pseudo)) return h + def evaluate(model, features, pseudo, labels, mask): model.eval() with torch.no_grad(): @@ -62,6 +62,7 @@ def evaluate(model, features, pseudo, labels, mask): correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) + def main(args): # load and preprocess dataset data = load_data(args) @@ -71,49 +72,59 @@ def main(args): else: cuda = True g = g.to(args.gpu) - features = g.ndata['feat'] - labels = g.ndata['label'] - train_mask = g.ndata['train_mask'] - val_mask = g.ndata['val_mask'] - test_mask = g.ndata['test_mask'] + features = g.ndata["feat"] + labels = g.ndata["label"] + train_mask = g.ndata["train_mask"] + val_mask = g.ndata["val_mask"] + test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() - print("""----Data statistics------' + print( + """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d - #Test samples %d""" % - (n_edges, n_classes, - train_mask.sum().item(), - val_mask.sum().item(), - test_mask.sum().item())) + #Test samples %d""" + % ( + n_edges, + n_classes, + train_mask.sum().item(), + val_mask.sum().item(), + test_mask.sum().item(), + ) + ) # graph preprocess and calculate normalization factor g = g.remove_self_loop().add_self_loop() n_edges = g.number_of_edges() - us, vs = g.edges(order='eid') - udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / torch.sqrt(g.in_degrees(vs).float()) + us, vs = g.edges(order="eid") + udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / torch.sqrt( + g.in_degrees(vs).float() + ) pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1) # create GraphSAGE model - model = MoNet(g, - in_feats, - args.n_hidden, - n_classes, - args.n_layers, - args.pseudo_dim, - args.n_kernels, - args.dropout - ) + model = MoNet( + g, + in_feats, + args.n_hidden, + n_classes, + args.n_layers, + args.pseudo_dim, + args.n_kernels, + args.dropout, + ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) + optimizer = torch.optim.Adam( + model.parameters(), lr=args.lr, weight_decay=args.weight_decay + ) # initialize graph dur = [] @@ -133,36 +144,54 @@ def main(args): dur.append(time.time() - t0) acc = evaluate(model, features, pseudo, labels, val_mask) - print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " - "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), - acc, n_edges / np.mean(dur) / 1000)) + print( + "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " + "ETputs(KTEPS) {:.2f}".format( + epoch, + np.mean(dur), + loss.item(), + acc, + n_edges / np.mean(dur) / 1000, + ) + ) print() acc = evaluate(model, features, pseudo, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='MoNet on citation network') +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="MoNet on citation network") register_data_args(parser) - parser.add_argument("--dropout", type=float, default=0.5, - help="dropout probability") - parser.add_argument("--gpu", type=int, default=-1, - help="gpu") - parser.add_argument("--lr", type=float, default=1e-2, - help="learning rate") - parser.add_argument("--n-epochs", type=int, default=200, - help="number of training epochs") - parser.add_argument("--n-hidden", type=int, default=16, - help="number of hidden gcn units") - parser.add_argument("--n-layers", type=int, default=1, - help="number of hidden gcn layers") - parser.add_argument("--pseudo-dim", type=int, default=2, - help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed") - parser.add_argument("--n-kernels", type=int, default=3, - help="Number of kernels in GMMConv layer") - parser.add_argument("--weight-decay", type=float, default=5e-4, - help="Weight for L2 loss") + parser.add_argument( + "--dropout", type=float, default=0.5, help="dropout probability" + ) + parser.add_argument("--gpu", type=int, default=-1, help="gpu") + parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") + parser.add_argument( + "--n-epochs", type=int, default=200, help="number of training epochs" + ) + parser.add_argument( + "--n-hidden", type=int, default=16, help="number of hidden gcn units" + ) + parser.add_argument( + "--n-layers", type=int, default=1, help="number of hidden gcn layers" + ) + parser.add_argument( + "--pseudo-dim", + type=int, + default=2, + help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed", + ) + parser.add_argument( + "--n-kernels", + type=int, + default=3, + help="Number of kernels in GMMConv layer", + ) + parser.add_argument( + "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" + ) args = parser.parse_args() print(args) diff --git a/examples/pytorch/multigpu/multi_gpu_graph_prediction.py b/examples/pytorch/multigpu/multi_gpu_graph_prediction.py index fc20a55aa0e3..dd525a03154d 100644 --- a/examples/pytorch/multigpu/multi_gpu_graph_prediction.py +++ b/examples/pytorch/multigpu/multi_gpu_graph_prediction.py @@ -1,16 +1,19 @@ +import argparse + import torch +import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import torch.distributed as dist import torch.optim as optim +from ogb.graphproppred import DglGraphPropPredDataset, Evaluator +from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder +from tqdm import tqdm + import dgl import dgl.nn as dglnn from dgl.data import AsGraphPredDataset from dgl.dataloading import GraphDataLoader -from ogb.graphproppred import DglGraphPropPredDataset, Evaluator -from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder -from tqdm import tqdm -import argparse + class MLP(nn.Module): def __init__(self, in_feats): @@ -20,25 +23,27 @@ def __init__(self, in_feats): nn.BatchNorm1d(2 * in_feats), nn.ReLU(), nn.Linear(2 * in_feats, in_feats), - nn.BatchNorm1d(in_feats) + nn.BatchNorm1d(in_feats), ) def forward(self, h): return self.mlp(h) + class GIN(nn.Module): def __init__(self, n_hidden, n_output, n_layers=5): super().__init__() self.node_encoder = AtomEncoder(n_hidden) - self.edge_encoders = nn.ModuleList([ - BondEncoder(n_hidden) for _ in range(n_layers)]) + self.edge_encoders = nn.ModuleList( + [BondEncoder(n_hidden) for _ in range(n_layers)] + ) self.pool = dglnn.AvgPooling() self.dropout = nn.Dropout(0.5) self.layers = nn.ModuleList() for _ in range(n_layers): - self.layers.append(dglnn.GINEConv(MLP(n_hidden), learn_eps=True)) - self.predictor = nn.Linear(n_hidden, n_output) + self.layers.append(dglnn.GINEConv(MLP(n_hidden), learn_eps=True)) + self.predictor = nn.Linear(n_hidden, n_output) # add virtual node self.virtual_emb = nn.Embedding(1, n_hidden) @@ -65,6 +70,7 @@ def forward(self, g, x, x_e): hn = self.pool(g, hn) return self.predictor(hn) + @torch.no_grad() def evaluate(dataloader, device, model, evaluator): model.eval() @@ -72,17 +78,23 @@ def evaluate(dataloader, device, model, evaluator): y_pred = [] for batched_graph, labels in tqdm(dataloader): batched_graph, labels = batched_graph.to(device), labels.to(device) - node_feat, edge_feat = batched_graph.ndata['feat'], batched_graph.edata['feat'] + node_feat, edge_feat = ( + batched_graph.ndata["feat"], + batched_graph.edata["feat"], + ) y_hat = model(batched_graph, node_feat, edge_feat) y_true.append(labels.view(y_hat.shape).detach().cpu()) - y_pred.append(y_hat.detach().cpu()) + y_pred.append(y_hat.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() - input_dict = {'y_true': y_true, 'y_pred': y_pred} + input_dict = {"y_true": y_true, "y_pred": y_pred} return evaluator.eval(input_dict) + def train(rank, world_size, dataset_name, root): - dist.init_process_group('nccl', 'tcp://127.0.0.1:12347', world_size=world_size, rank=rank) + dist.init_process_group( + "nccl", "tcp://127.0.0.1:12347", world_size=world_size, rank=rank + ) torch.cuda.set_device(rank) dataset = AsGraphPredDataset(DglGraphPropPredDataset(dataset_name, root)) @@ -94,48 +106,62 @@ def train(rank, world_size, dataset_name, root): scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) train_dataloader = GraphDataLoader( - dataset[dataset.train_idx], batch_size=256, - use_ddp=True, shuffle=True) - valid_dataloader = GraphDataLoader( - dataset[dataset.val_idx], batch_size=256) - test_dataloader = GraphDataLoader( - dataset[dataset.test_idx], batch_size=256) + dataset[dataset.train_idx], batch_size=256, use_ddp=True, shuffle=True + ) + valid_dataloader = GraphDataLoader(dataset[dataset.val_idx], batch_size=256) + test_dataloader = GraphDataLoader(dataset[dataset.test_idx], batch_size=256) for epoch in range(50): model.train() train_dataloader.set_epoch(epoch) for batched_graph, labels in train_dataloader: batched_graph, labels = batched_graph.to(rank), labels.to(rank) - node_feat, edge_feat = batched_graph.ndata['feat'], batched_graph.edata['feat'] + node_feat, edge_feat = ( + batched_graph.ndata["feat"], + batched_graph.edata["feat"], + ) logits = model(batched_graph, node_feat, edge_feat) optimizer.zero_grad() is_labeled = labels == labels - loss = F.binary_cross_entropy_with_logits(logits.float()[is_labeled], labels.float()[is_labeled]) + loss = F.binary_cross_entropy_with_logits( + logits.float()[is_labeled], labels.float()[is_labeled] + ) loss.backward() optimizer.step() scheduler.step() if rank == 0: - val_metric = evaluate(valid_dataloader, rank, model.module, evaluator)[evaluator.eval_metric] - test_metric = evaluate(test_dataloader, rank, model.module, evaluator)[evaluator.eval_metric] - - print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, ' - f'Val: {val_metric:.4f}, Test: {test_metric:.4f}') + val_metric = evaluate( + valid_dataloader, rank, model.module, evaluator + )[evaluator.eval_metric] + test_metric = evaluate( + test_dataloader, rank, model.module, evaluator + )[evaluator.eval_metric] + + print( + f"Epoch: {epoch:03d}, Loss: {loss:.4f}, " + f"Val: {val_metric:.4f}, Test: {test_metric:.4f}" + ) dist.destroy_process_group() -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--dataset', type=str, default="ogbg-molhiv", - choices=['ogbg-molhiv', 'ogbg-molpcba'], - help='name of dataset (default: ogbg-molhiv)') + parser.add_argument( + "--dataset", + type=str, + default="ogbg-molhiv", + choices=["ogbg-molhiv", "ogbg-molpcba"], + help="name of dataset (default: ogbg-molhiv)", + ) dataset_name = parser.parse_args().dataset - root = './data/OGB' + root = "./data/OGB" DglGraphPropPredDataset(dataset_name, root) world_size = torch.cuda.device_count() - print('Let\'s use', world_size, 'GPUs!') + print("Let's use", world_size, "GPUs!") args = (world_size, dataset_name, root) import torch.multiprocessing as mp + mp.spawn(train, args=args, nprocs=world_size, join=True) diff --git a/examples/pytorch/multigpu/multi_gpu_node_classification.py b/examples/pytorch/multigpu/multi_gpu_node_classification.py index aeda767d9109..4a5ff6d7951d 100644 --- a/examples/pytorch/multigpu/multi_gpu_node_classification.py +++ b/examples/pytorch/multigpu/multi_gpu_node_classification.py @@ -1,27 +1,34 @@ +import argparse import os + import torch +import torch.distributed as dist +import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF -import torch.distributed as dist +import tqdm +from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel -import torch.multiprocessing as mp + import dgl.nn as dglnn -from dgl.multiprocessing import shared_tensor from dgl.data import AsNodePredDataset -from dgl.dataloading import DataLoader, NeighborSampler, MultiLayerFullNeighborSampler -from ogb.nodeproppred import DglNodePropPredDataset -import tqdm -import argparse +from dgl.dataloading import ( + DataLoader, + MultiLayerFullNeighborSampler, + NeighborSampler, +) +from dgl.multiprocessing import shared_tensor + class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # three-layer GraphSAGE-mean - self.layers.append(dglnn.SAGEConv(in_size, hid_size, 'mean')) - self.layers.append(dglnn.SAGEConv(hid_size, hid_size, 'mean')) - self.layers.append(dglnn.SAGEConv(hid_size, out_size, 'mean')) + self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size @@ -36,21 +43,36 @@ def forward(self, blocks, x): return h def inference(self, g, device, batch_size, use_uva): - g.ndata['h'] = g.ndata['feat'] - sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=['h']) + g.ndata["h"] = g.ndata["feat"] + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["h"]) for l, layer in enumerate(self.layers): dataloader = DataLoader( - g, torch.arange(g.num_nodes(), device=device), sampler, device=device, - batch_size=batch_size, shuffle=False, drop_last=False, - num_workers=0, use_ddp=True, use_uva=use_uva) + g, + torch.arange(g.num_nodes(), device=device), + sampler, + device=device, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + use_ddp=True, + use_uva=use_uva, + ) # in order to prevent running out of GPU memory, allocate a # shared output tensor 'y' in host memory y = shared_tensor( - (g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size)) - for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader) \ - if dist.get_rank() == 0 else dataloader: - x = blocks[0].srcdata['h'] - h = layer(blocks[0], x) # len(blocks) = 1 + ( + g.num_nodes(), + self.hid_size + if l != len(self.layers) - 1 + else self.out_size, + ) + ) + for input_nodes, output_nodes, blocks in ( + tqdm.tqdm(dataloader) if dist.get_rank() == 0 else dataloader + ): + x = blocks[0].srcdata["h"] + h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) @@ -58,51 +80,74 @@ def inference(self, g, device, batch_size, use_uva): y[output_nodes] = h.to(y.device, non_blocking=True) # make sure all GPUs are done writing to 'y' dist.barrier() - g.ndata['h'] = y if use_uva else y.to(device) + g.ndata["h"] = y if use_uva else y.to(device) - g.ndata.pop('h') + g.ndata.pop("h") return y + def evaluate(model, g, dataloader): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): with torch.no_grad(): - x = blocks[0].srcdata['feat'] - ys.append(blocks[-1].dstdata['label']) + x = blocks[0].srcdata["feat"] + ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) return MF.accuracy(torch.cat(y_hats), torch.cat(ys)) -def layerwise_infer(proc_id, device, g, nid, model, use_uva, batch_size = 2**16): + +def layerwise_infer( + proc_id, device, g, nid, model, use_uva, batch_size=2**16 +): model.eval() with torch.no_grad(): pred = model.module.inference(g, device, batch_size, use_uva) pred = pred[nid] - labels = g.ndata['label'][nid].to(pred.device) + labels = g.ndata["label"][nid].to(pred.device) if proc_id == 0: acc = MF.accuracy(pred, labels) print("Test Accuracy {:.4f}".format(acc.item())) + def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva): - sampler = NeighborSampler([10, 10, 10], - prefetch_node_feats=['feat'], - prefetch_labels=['label']) - train_dataloader = DataLoader(g, train_idx, sampler, device=device, - batch_size=1024, shuffle=True, - drop_last=False, num_workers=0, - use_ddp=True, use_uva=use_uva) - val_dataloader = DataLoader(g, val_idx, sampler, device=device, - batch_size=1024, shuffle=True, - drop_last=False, num_workers=0, - use_ddp=True, use_uva=use_uva) + sampler = NeighborSampler( + [10, 10, 10], prefetch_node_feats=["feat"], prefetch_labels=["label"] + ) + train_dataloader = DataLoader( + g, + train_idx, + sampler, + device=device, + batch_size=1024, + shuffle=True, + drop_last=False, + num_workers=0, + use_ddp=True, + use_uva=use_uva, + ) + val_dataloader = DataLoader( + g, + val_idx, + sampler, + device=device, + batch_size=1024, + shuffle=True, + drop_last=False, + num_workers=0, + use_ddp=True, + use_uva=use_uva, + ) opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 - for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = blocks[0].srcdata['feat'] - y = blocks[-1].dstdata['label'] + for it, (input_nodes, output_nodes, blocks) in enumerate( + train_dataloader + ): + x = blocks[0].srcdata["feat"] + y = blocks[-1].dstdata["label"] y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() @@ -111,54 +156,80 @@ def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva): total_loss += loss acc = evaluate(model, g, val_dataloader).to(device) / nprocs dist.reduce(acc, 0) - if (proc_id == 0): - print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} " - .format(epoch, total_loss / (it+1), acc.item())) + if proc_id == 0: + print( + "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( + epoch, total_loss / (it + 1), acc.item() + ) + ) + def run(proc_id, nprocs, devices, g, data, mode): # find corresponding device for my rank device = devices[proc_id] torch.cuda.set_device(device) # initialize process group and unpack data for sub-processes - dist.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:12345', - world_size=nprocs, rank=proc_id) + dist.init_process_group( + backend="nccl", + init_method="tcp://127.0.0.1:12345", + world_size=nprocs, + rank=proc_id, + ) out_size, train_idx, val_idx, test_idx = data train_idx = train_idx.to(device) val_idx = val_idx.to(device) - g = g.to(device if mode == 'puregpu' else 'cpu') + g = g.to(device if mode == "puregpu" else "cpu") # create GraphSAGE model (distributed) - in_size = g.ndata['feat'].shape[1] + in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 256, out_size).to(device) - model = DistributedDataParallel(model, device_ids=[device], output_device=device) + model = DistributedDataParallel( + model, device_ids=[device], output_device=device + ) # training + testing - use_uva = (mode == 'mixed') + use_uva = mode == "mixed" train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva) layerwise_infer(proc_id, device, g, test_idx, model, use_uva) # cleanup process group dist.destroy_process_group() -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--mode", default='mixed', choices=['mixed', 'puregpu'], - help="Training mode. 'mixed' for CPU-GPU mixed training, " - "'puregpu' for pure-GPU training.") - parser.add_argument("--gpu", type=str, default='0', - help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," - " e.g., 0,1,2,3.") + parser.add_argument( + "--mode", + default="mixed", + choices=["mixed", "puregpu"], + help="Training mode. 'mixed' for CPU-GPU mixed training, " + "'puregpu' for pure-GPU training.", + ) + parser.add_argument( + "--gpu", + type=str, + default="0", + help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," + " e.g., 0,1,2,3.", + ) args = parser.parse_args() - devices = list(map(int, args.gpu.split(','))) + devices = list(map(int, args.gpu.split(","))) nprocs = len(devices) - assert torch.cuda.is_available(), f"Must have GPUs to enable multi-gpu training." - print(f'Training in {args.mode} mode using {nprocs} GPU(s)') + assert ( + torch.cuda.is_available() + ), f"Must have GPUs to enable multi-gpu training." + print(f"Training in {args.mode} mode using {nprocs} GPU(s)") # load and preprocess dataset - print('Loading data') - dataset = AsNodePredDataset(DglNodePropPredDataset('ogbn-products')) + print("Loading data") + dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] # avoid creating certain graph formats in each sub-process to save momory g.create_formats_() # thread limiting to avoid resource competition - os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // nprocs) - data = dataset.num_classes, dataset.train_idx, dataset.val_idx, dataset.test_idx + os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) + data = ( + dataset.num_classes, + dataset.train_idx, + dataset.val_idx, + dataset.test_idx, + ) mp.spawn(run, args=(nprocs, devices, g, data, args.mode), nprocs=nprocs) diff --git a/examples/pytorch/mvgrl/graph/dataset.py b/examples/pytorch/mvgrl/graph/dataset.py index 173e8585e8d7..b7cd08310e9a 100644 --- a/examples/pytorch/mvgrl/graph/dataset.py +++ b/examples/pytorch/mvgrl/graph/dataset.py @@ -1,68 +1,84 @@ -''' Code adapted from https://github.com/kavehhassani/mvgrl ''' +""" Code adapted from https://github.com/kavehhassani/mvgrl """ import os import re +from collections import Counter + +import networkx as nx import numpy as np -import dgl import torch as th -import networkx as nx -from dgl.data import DGLDataset -from collections import Counter from scipy.linalg import fractional_matrix_power, inv -''' Compute Personalized Page Ranking''' +import dgl +from dgl.data import DGLDataset + +""" Compute Personalized Page Ranking""" + + def compute_ppr(graph: nx.Graph, alpha=0.2, self_loop=True): a = nx.convert_matrix.to_numpy_array(graph) if self_loop: - a = a + np.eye(a.shape[0]) # A^ = A + I_n - d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii - dinv = fractional_matrix_power(d, -0.5) # D^(-1/2) - at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2) - return alpha * inv((np.eye(a.shape[0]) - (1 - alpha) * at)) # a(I_n-(1-a)A~)^-1 + a = a + np.eye(a.shape[0]) # A^ = A + I_n + d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii + dinv = fractional_matrix_power(d, -0.5) # D^(-1/2) + at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2) + return alpha * inv( + (np.eye(a.shape[0]) - (1 - alpha) * at) + ) # a(I_n-(1-a)A~)^-1 def download(dataset, datadir): os.makedirs(datadir) - url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/{0}.zip'.format(dataset) + url = "https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/{0}.zip".format( + dataset + ) zipfile = os.path.basename(url) - os.system('wget {0}; unzip {1}'.format(url, zipfile)) - os.system('mv {0}/* {1}'.format(dataset, datadir)) - os.system('rm -r {0}'.format(dataset)) - os.system('rm {0}'.format(zipfile)) + os.system("wget {0}; unzip {1}".format(url, zipfile)) + os.system("mv {0}/* {1}".format(dataset, datadir)) + os.system("rm -r {0}".format(dataset)) + os.system("rm {0}".format(zipfile)) + def process(dataset): - src = os.path.join(os.path.dirname(__file__), 'data') + src = os.path.join(os.path.dirname(__file__), "data") prefix = os.path.join(src, dataset, dataset) # assign each node to the corresponding graph graph_node_dict = {} - with open('{0}_graph_indicator.txt'.format(prefix), 'r') as f: + with open("{0}_graph_indicator.txt".format(prefix), "r") as f: for idx, line in enumerate(f): - graph_node_dict[idx + 1] = int(line.strip('\n')) + graph_node_dict[idx + 1] = int(line.strip("\n")) node_labels = [] - if os.path.exists('{0}_node_labels.txt'.format(prefix)): - with open('{0}_node_labels.txt'.format(prefix), 'r') as f: + if os.path.exists("{0}_node_labels.txt".format(prefix)): + with open("{0}_node_labels.txt".format(prefix), "r") as f: for line in f: - node_labels += [int(line.strip('\n')) - 1] + node_labels += [int(line.strip("\n")) - 1] num_unique_node_labels = max(node_labels) + 1 else: - print('No node labels') + print("No node labels") node_attrs = [] - if os.path.exists('{0}_node_attributes.txt'.format(prefix)): - with open('{0}_node_attributes.txt'.format(prefix), 'r') as f: + if os.path.exists("{0}_node_attributes.txt".format(prefix)): + with open("{0}_node_attributes.txt".format(prefix), "r") as f: for line in f: node_attrs.append( - np.array([float(attr) for attr in re.split("[,\s]+", line.strip("\s\n")) if attr], dtype=np.float) + np.array( + [ + float(attr) + for attr in re.split("[,\s]+", line.strip("\s\n")) + if attr + ], + dtype=np.float, + ) ) else: - print('No node attributes') + print("No node attributes") graph_labels = [] unique_labels = set() - with open('{0}_graph_labels.txt'.format(prefix), 'r') as f: + with open("{0}_graph_labels.txt".format(prefix), "r") as f: for line in f: - val = int(line.strip('\n')) + val = int(line.strip("\n")) if val not in unique_labels: unique_labels.add(val) graph_labels.append(val) @@ -71,9 +87,9 @@ def process(dataset): adj_list = {idx: [] for idx in range(1, len(graph_labels) + 1)} index_graph = {idx: [] for idx in range(1, len(graph_labels) + 1)} - with open('{0}_A.txt'.format(prefix), 'r') as f: + with open("{0}_A.txt".format(prefix), "r") as f: for line in f: - u, v = tuple(map(int, line.strip('\n').split(','))) + u, v = tuple(map(int, line.strip("\n").split(","))) adj_list[graph_node_dict[u]].append((u, v)) index_graph[graph_node_dict[u]] += [u, v] @@ -84,17 +100,17 @@ def process(dataset): for idx in range(1, 1 + len(adj_list)): graph = nx.from_edgelist(adj_list[idx]) - graph.graph['label'] = graph_labels[idx - 1] + graph.graph["label"] = graph_labels[idx - 1] for u in graph.nodes(): if len(node_labels) > 0: node_label_one_hot = [0] * num_unique_node_labels node_label = node_labels[u - 1] node_label_one_hot[node_label] = 1 - graph.nodes[u]['label'] = node_label_one_hot + graph.nodes[u]["label"] = node_label_one_hot if len(node_attrs) > 0: - graph.nodes[u]['feat'] = node_attrs[u - 1] + graph.nodes[u]["feat"] = node_attrs[u - 1] if len(node_attrs) > 0: - graph.graph['feat_dim'] = node_attrs[0].shape[0] + graph.graph["feat_dim"] = node_attrs[0].shape[0] # relabeling mapping = {} @@ -104,7 +120,7 @@ def process(dataset): graphs.append(nx.relabel_nodes(graph, mapping)) pprs.append(compute_ppr(graph, alpha=0.2)) - if 'feat_dim' in graphs[0].graph: + if "feat_dim" in graphs[0].graph: pass else: max_deg = max([max(dict(graph.degree).values()) for graph in graphs]) @@ -112,15 +128,18 @@ def process(dataset): for u in graph.nodes(data=True): f = np.zeros(max_deg + 1) f[graph.degree[u[0]]] = 1.0 - if 'label' in u[1]: - f = np.concatenate((np.array(u[1]['label'], dtype=np.float), f)) - graph.nodes[u[0]]['feat'] = f + if "label" in u[1]: + f = np.concatenate( + (np.array(u[1]["label"], dtype=np.float), f) + ) + graph.nodes[u[0]]["feat"] = f return graphs, pprs + def load(dataset): basedir = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(basedir, 'data', dataset) + datadir = os.path.join(basedir, "data", dataset) if not os.path.exists(datadir): download(dataset, datadir) @@ -129,20 +148,27 @@ def load(dataset): for idx, graph in enumerate(graphs): adj.append(nx.to_numpy_array(graph)) - labels.append(graph.graph['label']) - feat.append(np.array(list(nx.get_node_attributes(graph, 'feat').values()))) - - adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(feat), np.array(labels) - - np.save(f'{datadir}/adj.npy', adj) - np.save(f'{datadir}/diff.npy', diff) - np.save(f'{datadir}/feat.npy', feat) - np.save(f'{datadir}/labels.npy', labels) + labels.append(graph.graph["label"]) + feat.append( + np.array(list(nx.get_node_attributes(graph, "feat").values())) + ) + + adj, diff, feat, labels = ( + np.array(adj), + np.array(diff), + np.array(feat), + np.array(labels), + ) + + np.save(f"{datadir}/adj.npy", adj) + np.save(f"{datadir}/diff.npy", diff) + np.save(f"{datadir}/feat.npy", feat) + np.save(f"{datadir}/labels.npy", labels) else: - adj = np.load(f'{datadir}/adj.npy', allow_pickle=True) - diff = np.load(f'{datadir}/diff.npy', allow_pickle=True) - feat = np.load(f'{datadir}/feat.npy', allow_pickle=True) - labels = np.load(f'{datadir}/labels.npy', allow_pickle=True) + adj = np.load(f"{datadir}/adj.npy", allow_pickle=True) + diff = np.load(f"{datadir}/diff.npy", allow_pickle=True) + feat = np.load(f"{datadir}/feat.npy", allow_pickle=True) + labels = np.load(f"{datadir}/labels.npy", allow_pickle=True) n_graphs = adj.shape[0] @@ -156,14 +182,14 @@ def load(dataset): graph = dgl.graph(edge_indexes) graph = graph.add_self_loop() - graph.ndata['feat'] = th.tensor(feat[i]).float() + graph.ndata["feat"] = th.tensor(feat[i]).float() diff_adj = diff[i] diff_indexes = diff_adj.nonzero() diff_weight = th.tensor(diff_adj[diff_indexes]).float() diff_graph = dgl.graph(diff_indexes) - diff_graph.edata['edge_weight'] = diff_weight + diff_graph.edata["edge_weight"] = diff_weight label = labels[i] graphs.append(graph) diff_graphs.append(diff_graph) @@ -174,9 +200,10 @@ def load(dataset): dataset = TUDataset(graphs, diff_graphs, labels) return dataset + class TUDataset(DGLDataset): def __init__(self, graphs, diff_graphs, labels): - super(TUDataset, self).__init__(name='tu') + super(TUDataset, self).__init__(name="tu") self.graphs = graphs self.diff_graphs = diff_graphs self.labels = labels @@ -188,4 +215,4 @@ def __len__(self): return len(self.graphs) def __getitem__(self, idx): - return self.graphs[idx], self.diff_graphs[idx], self.labels[idx] \ No newline at end of file + return self.graphs[idx], self.diff_graphs[idx], self.labels[idx] diff --git a/examples/pytorch/mvgrl/graph/main.py b/examples/pytorch/mvgrl/graph/main.py index dd913f9eb877..529334710010 100644 --- a/examples/pytorch/mvgrl/graph/main.py +++ b/examples/pytorch/mvgrl/graph/main.py @@ -1,39 +1,54 @@ import argparse +import warnings + import torch as th +from dataset import load import dgl from dgl.dataloading import GraphDataLoader -import warnings -from dataset import load -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") -from utils import linearsvc from model import MVGRL +from utils import linearsvc -parser = argparse.ArgumentParser(description='mvgrl') - -parser.add_argument('--dataname', type=str, default='MUTAG', help='Name of dataset.') -parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using cpu.') -parser.add_argument('--epochs', type=int, default=200, help=' Number of training periods.') -parser.add_argument('--patience', type=int, default=20, help='Early stopping steps.') -parser.add_argument('--lr', type=float, default=0.001, help='Learning rate of mvgrl.') -parser.add_argument('--wd', type=float, default=0., help='Weight decay of mvgrl.') -parser.add_argument('--batch_size', type=int, default=64, help='Batch size.') -parser.add_argument('--n_layers', type=int, default=4, help='Number of GNN layers.') -parser.add_argument("--hid_dim", type=int, default=32, help='Hidden layer dim.') +parser = argparse.ArgumentParser(description="mvgrl") + +parser.add_argument( + "--dataname", type=str, default="MUTAG", help="Name of dataset." +) +parser.add_argument( + "--gpu", type=int, default=-1, help="GPU index. Default: -1, using cpu." +) +parser.add_argument( + "--epochs", type=int, default=200, help=" Number of training periods." +) +parser.add_argument( + "--patience", type=int, default=20, help="Early stopping steps." +) +parser.add_argument( + "--lr", type=float, default=0.001, help="Learning rate of mvgrl." +) +parser.add_argument( + "--wd", type=float, default=0.0, help="Weight decay of mvgrl." +) +parser.add_argument("--batch_size", type=int, default=64, help="Batch size.") +parser.add_argument( + "--n_layers", type=int, default=4, help="Number of GNN layers." +) +parser.add_argument("--hid_dim", type=int, default=32, help="Hidden layer dim.") args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): - args.device = 'cuda:{}'.format(args.gpu) + args.device = "cuda:{}".format(args.gpu) else: - args.device = 'cpu' + args.device = "cpu" def collate(samples): - ''' collate function for building the graph dataloader''' + """collate function for building the graph dataloader""" graphs, diff_graphs, labels = map(list, zip(*samples)) # generate batched graphs and labels @@ -45,30 +60,33 @@ def collate(samples): graph_id = th.arange(n_graphs) graph_id = dgl.broadcast_nodes(batched_graph, graph_id) - batched_graph.ndata['graph_id'] = graph_id + batched_graph.ndata["graph_id"] = graph_id return batched_graph, batched_diff_graph, batched_labels -if __name__ == '__main__': + +if __name__ == "__main__": # Step 1: Prepare data =================================================================== # dataset = load(args.dataname) graphs, diff_graphs, labels = map(list, zip(*dataset)) - print('Number of graphs:', len(graphs)) + print("Number of graphs:", len(graphs)) # generate a full-graph with all examples for evaluation wholegraph = dgl.batch(graphs) whole_dg = dgl.batch(diff_graphs) # create dataloader for batch training - dataloader = GraphDataLoader(dataset, - batch_size=args.batch_size, - collate_fn=collate, - drop_last=False, - shuffle=True) + dataloader = GraphDataLoader( + dataset, + batch_size=args.batch_size, + collate_fn=collate, + drop_last=False, + shuffle=True, + ) - in_dim = wholegraph.ndata['feat'].shape[1] + in_dim = wholegraph.ndata["feat"].shape[1] # Step 2: Create model =================================================================== # model = MVGRL(in_dim, args.hid_dim, args.n_layers) @@ -77,19 +95,19 @@ def collate(samples): # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam(model.parameters(), lr=args.lr) - print('===== Before training ======') + print("===== Before training ======") wholegraph = wholegraph.to(args.device) whole_dg = whole_dg.to(args.device) - wholefeat = wholegraph.ndata.pop('feat') - whole_weight = whole_dg.edata.pop('edge_weight') + wholefeat = wholegraph.ndata.pop("feat") + whole_weight = whole_dg.edata.pop("edge_weight") embs = model.get_embedding(wholegraph, whole_dg, wholefeat, whole_weight) lbls = th.LongTensor(labels) acc_mean, acc_std = linearsvc(embs, lbls) - print('accuracy_mean, {:.4f}'.format(acc_mean)) + print("accuracy_mean, {:.4f}".format(acc_mean)) - best = float('inf') + best = float("inf") cnt_wait = 0 # Step 4: Training epochs =============================================================== # for epoch in range(args.epochs): @@ -100,9 +118,9 @@ def collate(samples): graph = graph.to(args.device) diff_graph = diff_graph.to(args.device) - feat = graph.ndata['feat'] - graph_id = graph.ndata['graph_id'] - edge_weight = diff_graph.edata['edge_weight'] + feat = graph.ndata["feat"] + graph_id = graph.ndata["graph_id"] + edge_weight = diff_graph.edata["edge_weight"] n_graph = label.shape[0] optimizer.zero_grad() @@ -111,25 +129,25 @@ def collate(samples): loss.backward() optimizer.step() - print('Epoch {}, Loss {:.4f}'.format(epoch, loss_all)) + print("Epoch {}, Loss {:.4f}".format(epoch, loss_all)) if loss < best: best = loss best_t = epoch cnt_wait = 0 - th.save(model.state_dict(), f'{args.dataname}.pkl') + th.save(model.state_dict(), f"{args.dataname}.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: - print('Early stopping') + print("Early stopping") break - print('Training End') + print("Training End") # Step 5: Linear evaluation ========================================================== # - model.load_state_dict(th.load(f'{args.dataname}.pkl')) + model.load_state_dict(th.load(f"{args.dataname}.pkl")) embs = model.get_embedding(wholegraph, whole_dg, wholefeat, whole_weight) acc_mean, acc_std = linearsvc(embs, lbls) - print('accuracy_mean, {:.4f}'.format(acc_mean)) \ No newline at end of file + print("accuracy_mean, {:.4f}".format(acc_mean)) diff --git a/examples/pytorch/mvgrl/graph/model.py b/examples/pytorch/mvgrl/graph/model.py index 7950b4bd2297..b9281721af4d 100644 --- a/examples/pytorch/mvgrl/graph/model.py +++ b/examples/pytorch/mvgrl/graph/model.py @@ -1,10 +1,10 @@ import torch as th import torch.nn as nn +from utils import local_global_loss_ from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch.glob import SumPooling -from utils import local_global_loss_ class MLP(nn.Module): def __init__(self, in_dim, out_dim): @@ -15,7 +15,7 @@ def __init__(self, in_dim, out_dim): nn.Linear(out_dim, out_dim), nn.PReLU(), nn.Linear(out_dim, out_dim), - nn.PReLU() + nn.PReLU(), ) self.linear_shortcut = nn.Linear(in_dim, out_dim) @@ -30,13 +30,25 @@ def __init__(self, in_dim, out_dim, num_layers, norm): self.num_layers = num_layers self.layers = nn.ModuleList() - self.layers.append(GraphConv(in_dim, out_dim, bias=False, norm=norm, activation = nn.PReLU())) + self.layers.append( + GraphConv( + in_dim, out_dim, bias=False, norm=norm, activation=nn.PReLU() + ) + ) self.pooling = SumPooling() for _ in range(num_layers - 1): - self.layers.append(GraphConv(out_dim, out_dim, bias=False, norm=norm, activation = nn.PReLU())) - - def forward(self, graph, feat, edge_weight = None): + self.layers.append( + GraphConv( + out_dim, + out_dim, + bias=False, + norm=norm, + activation=nn.PReLU(), + ) + ) + + def forward(self, graph, feat, edge_weight=None): h = self.layers[0](graph, feat, edge_weight=edge_weight) hg = self.pooling(graph, h) @@ -70,17 +82,19 @@ class MVGRL(nn.Module): edge_weight: tensor Edge weight of the diffusion graph """ + def __init__(self, in_dim, out_dim, num_layers): super(MVGRL, self).__init__() self.local_mlp = MLP(out_dim, out_dim) self.global_mlp = MLP(num_layers * out_dim, out_dim) - self.encoder1 = GCN(in_dim, out_dim, num_layers, norm='both') - self.encoder2 = GCN(in_dim, out_dim, num_layers, norm='none') - + self.encoder1 = GCN(in_dim, out_dim, num_layers, norm="both") + self.encoder2 = GCN(in_dim, out_dim, num_layers, norm="none") def get_embedding(self, graph1, graph2, feat, edge_weight): local_v1, global_v1 = self.encoder1(graph1, feat) - local_v2, global_v2 = self.encoder2(graph2, feat, edge_weight=edge_weight) + local_v2, global_v2 = self.encoder2( + graph2, feat, edge_weight=edge_weight + ) global_v1 = self.global_mlp(global_v1) global_v2 = self.global_mlp(global_v2) @@ -90,7 +104,9 @@ def get_embedding(self, graph1, graph2, feat, edge_weight): def forward(self, graph1, graph2, feat, edge_weight, graph_id): # calculate node embeddings and graph embeddings local_v1, global_v1 = self.encoder1(graph1, feat) - local_v2, global_v2 = self.encoder2(graph2, feat, edge_weight=edge_weight) + local_v2, global_v2 = self.encoder2( + graph2, feat, edge_weight=edge_weight + ) local_v1 = self.local_mlp(local_v1) local_v2 = self.local_mlp(local_v2) @@ -105,8 +121,3 @@ def forward(self, graph1, graph2, feat, edge_weight, graph_id): loss = loss1 + loss2 return loss - - - - - diff --git a/examples/pytorch/mvgrl/graph/utils.py b/examples/pytorch/mvgrl/graph/utils.py index e4fee4436c87..550c64f5e3d0 100644 --- a/examples/pytorch/mvgrl/graph/utils.py +++ b/examples/pytorch/mvgrl/graph/utils.py @@ -1,28 +1,31 @@ -''' Code adapted from https://github.com/fanyun-sun/InfoGraph ''' -import torch as th -import torch.nn.functional as F - +""" Code adapted from https://github.com/fanyun-sun/InfoGraph """ import math -import numpy as np -from sklearn.svm import LinearSVC +import numpy as np +import torch as th +import torch.nn.functional as F from sklearn.metrics import accuracy_score from sklearn.model_selection import GridSearchCV, StratifiedKFold +from sklearn.svm import LinearSVC + def linearsvc(embeds, labels): x = embeds.cpu().numpy() y = labels.cpu().numpy() - params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]} + params = {"C": [0.001, 0.01, 0.1, 1, 10, 100, 1000]} kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) accuracies = [] for train_index, test_index in kf.split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] - classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0) + classifier = GridSearchCV( + LinearSVC(), params, cv=5, scoring="accuracy", verbose=0 + ) classifier.fit(x_train, y_train) accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) return np.mean(accuracies), np.std(accuracies) + def get_positive_expectation(p_samples, average=True): """Computes the positive part of a JS Divergence. Args: @@ -31,8 +34,8 @@ def get_positive_expectation(p_samples, average=True): Returns: th.Tensor """ - log_2 = math.log(2.) - Ep = log_2 - F.softplus(- p_samples) + log_2 = math.log(2.0) + Ep = log_2 - F.softplus(-p_samples) if average: return Ep.mean() @@ -48,7 +51,7 @@ def get_negative_expectation(q_samples, average=True): Returns: th.Tensor """ - log_2 = math.log(2.) + log_2 = math.log(2.0) Eq = F.softplus(-q_samples) + q_samples - log_2 if average: @@ -69,8 +72,8 @@ def local_global_loss_(l_enc, g_enc, graph_id): for nodeidx, graphidx in enumerate(graph_id): - pos_mask[nodeidx][graphidx] = 1. - neg_mask[nodeidx][graphidx] = 0. + pos_mask[nodeidx][graphidx] = 1.0 + neg_mask[nodeidx][graphidx] = 0.0 res = th.mm(l_enc, g_enc.t()) diff --git a/examples/pytorch/mvgrl/node/dataset.py b/examples/pytorch/mvgrl/node/dataset.py index f11e6665f71c..91f0d7b6054e 100644 --- a/examples/pytorch/mvgrl/node/dataset.py +++ b/examples/pytorch/mvgrl/node/dataset.py @@ -1,22 +1,21 @@ -''' Code adapted from https://github.com/kavehhassani/mvgrl ''' +""" Code adapted from https://github.com/kavehhassani/mvgrl """ +import networkx as nx import numpy as np -import torch as th import scipy.sparse as sp +import torch as th from scipy.linalg import fractional_matrix_power, inv - -import dgl -from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset -import networkx as nx - from sklearn.preprocessing import MinMaxScaler +import dgl +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.nn import APPNPConv + def preprocess_features(features): """Row-normalize feature matrix and convert to tuple representation""" rowsum = np.array(features.sum(1)) r_inv = np.power(rowsum, -1).flatten() - r_inv[np.isinf(r_inv)] = 0. + r_inv[np.isinf(r_inv)] = 0.0 r_mat_inv = sp.diags(r_inv) features = r_mat_inv.dot(features) if isinstance(features, np.ndarray): @@ -52,22 +51,24 @@ def compute_ppr(graph: nx.Graph, alpha=0.2, self_loop=True): d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii dinv = fractional_matrix_power(d, -0.5) # D^(-1/2) at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2) - return alpha * inv((np.eye(a.shape[0]) - (1 - alpha) * at)) # a(I_n-(1-a)A~)^-1 + return alpha * inv( + (np.eye(a.shape[0]) - (1 - alpha) * at) + ) # a(I_n-(1-a)A~)^-1 def process_dataset(name, epsilon): - if name == 'cora': + if name == "cora": dataset = CoraGraphDataset() - elif name == 'citeseer': + elif name == "citeseer": dataset = CiteseerGraphDataset() graph = dataset[0] - feat = graph.ndata.pop('feat') - label = graph.ndata.pop('label') + feat = graph.ndata.pop("feat") + label = graph.ndata.pop("label") - train_mask = graph.ndata.pop('train_mask') - val_mask = graph.ndata.pop('val_mask') - test_mask = graph.ndata.pop('test_mask') + train_mask = graph.ndata.pop("train_mask") + val_mask = graph.ndata.pop("val_mask") + test_mask = graph.ndata.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() val_idx = th.nonzero(val_mask, as_tuple=False).squeeze() @@ -75,12 +76,12 @@ def process_dataset(name, epsilon): nx_g = dgl.to_networkx(graph) - print('computing ppr') + print("computing ppr") diff_adj = compute_ppr(nx_g, 0.2) - print('computing end') + print("computing end") - if name == 'citeseer': - print('additional processing') + if name == "citeseer": + print("additional processing") feat = th.tensor(preprocess_features(feat.numpy())).float() diff_adj[diff_adj < epsilon] = 0 scaler = MinMaxScaler() @@ -93,19 +94,29 @@ def process_dataset(name, epsilon): graph = graph.add_self_loop() - return graph, diff_graph, feat, label, train_idx, val_idx, test_idx, diff_weight + return ( + graph, + diff_graph, + feat, + label, + train_idx, + val_idx, + test_idx, + diff_weight, + ) + def process_dataset_appnp(epsilon): k = 20 alpha = 0.2 dataset = PubmedGraphDataset() graph = dataset[0] - feat = graph.ndata.pop('feat') - label = graph.ndata.pop('label') + feat = graph.ndata.pop("feat") + label = graph.ndata.pop("label") - train_mask = graph.ndata.pop('train_mask') - val_mask = graph.ndata.pop('val_mask') - test_mask = graph.ndata.pop('test_mask') + train_mask = graph.ndata.pop("train_mask") + val_mask = graph.ndata.pop("val_mask") + test_mask = graph.ndata.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() val_idx = th.nonzero(val_mask, as_tuple=False).squeeze() @@ -123,4 +134,13 @@ def process_dataset_appnp(epsilon): diff_weight = diff_adj[diff_edges] diff_graph = dgl.graph(diff_edges) - return graph, diff_graph, feat, label, train_idx, val_idx, test_idx, diff_weight \ No newline at end of file + return ( + graph, + diff_graph, + feat, + label, + train_idx, + val_idx, + test_idx, + diff_weight, + ) diff --git a/examples/pytorch/mvgrl/node/main.py b/examples/pytorch/mvgrl/node/main.py index 6d68cddf19d6..918cba5b75ff 100644 --- a/examples/pytorch/mvgrl/node/main.py +++ b/examples/pytorch/mvgrl/node/main.py @@ -1,41 +1,74 @@ import argparse +import warnings + import numpy as np import torch as th import torch.nn as nn -import warnings - -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") from dataset import process_dataset from model import MVGRL, LogReg -parser = argparse.ArgumentParser(description='mvgrl') - -parser.add_argument('--dataname', type=str, default='cora', help='Name of dataset.') -parser.add_argument('--gpu', type=int, default=0, help='GPU index. Default: -1, using cpu.') -parser.add_argument('--epochs', type=int, default=500, help='Training epochs.') -parser.add_argument('--patience', type=int, default=20, help='Patient epochs to wait before early stopping.') -parser.add_argument('--lr1', type=float, default=0.001, help='Learning rate of mvgrl.') -parser.add_argument('--lr2', type=float, default=0.01, help='Learning rate of linear evaluator.') -parser.add_argument('--wd1', type=float, default=0., help='Weight decay of mvgrl.') -parser.add_argument('--wd2', type=float, default=0., help='Weight decay of linear evaluator.') -parser.add_argument('--epsilon', type=float, default=0.01, help='Edge mask threshold of diffusion graph.') -parser.add_argument("--hid_dim", type=int, default=512, help='Hidden layer dim.') +parser = argparse.ArgumentParser(description="mvgrl") + +parser.add_argument( + "--dataname", type=str, default="cora", help="Name of dataset." +) +parser.add_argument( + "--gpu", type=int, default=0, help="GPU index. Default: -1, using cpu." +) +parser.add_argument("--epochs", type=int, default=500, help="Training epochs.") +parser.add_argument( + "--patience", + type=int, + default=20, + help="Patient epochs to wait before early stopping.", +) +parser.add_argument( + "--lr1", type=float, default=0.001, help="Learning rate of mvgrl." +) +parser.add_argument( + "--lr2", type=float, default=0.01, help="Learning rate of linear evaluator." +) +parser.add_argument( + "--wd1", type=float, default=0.0, help="Weight decay of mvgrl." +) +parser.add_argument( + "--wd2", type=float, default=0.0, help="Weight decay of linear evaluator." +) +parser.add_argument( + "--epsilon", + type=float, + default=0.01, + help="Edge mask threshold of diffusion graph.", +) +parser.add_argument( + "--hid_dim", type=int, default=512, help="Hidden layer dim." +) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): - args.device = 'cuda:{}'.format(args.gpu) + args.device = "cuda:{}".format(args.gpu) else: - args.device = 'cpu' + args.device = "cpu" -if __name__ == '__main__': +if __name__ == "__main__": print(args) # Step 1: Prepare data =================================================================== # - graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight = process_dataset(args.dataname, args.epsilon) + ( + graph, + diff_graph, + feat, + label, + train_idx, + val_idx, + test_idx, + edge_weight, + ) = process_dataset(args.dataname, args.epsilon) n_feat = feat.shape[1] n_classes = np.unique(label).shape[0] @@ -60,11 +93,13 @@ lbl = lbl.to(args.device) # Step 3: Create training components ===================================================== # - optimizer = th.optim.Adam(model.parameters(), lr=args.lr1, weight_decay=args.wd1) + optimizer = th.optim.Adam( + model.parameters(), lr=args.lr1, weight_decay=args.wd1 + ) loss_fn = nn.BCEWithLogitsLoss() # Step 4: Training epochs ================================================================ # - best = float('inf') + best = float("inf") cnt_wait = 0 for epoch in range(args.epochs): model.train() @@ -80,20 +115,20 @@ loss.backward() optimizer.step() - print('Epoch: {0}, Loss: {1:0.4f}'.format(epoch, loss.item())) + print("Epoch: {0}, Loss: {1:0.4f}".format(epoch, loss.item())) if loss < best: best = loss cnt_wait = 0 - th.save(model.state_dict(), 'model.pkl') + th.save(model.state_dict(), "model.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: - print('Early stopping') + print("Early stopping") break - model.load_state_dict(th.load('model.pkl')) + model.load_state_dict(th.load("model.pkl")) embeds = model.get_embedding(graph, diff_graph, feat, edge_weight) train_embs = embeds[train_idx] @@ -107,7 +142,9 @@ # Step 5: Linear evaluation ========================================================== # for _ in range(5): model = LogReg(args.hid_dim, n_classes) - opt = th.optim.Adam(model.parameters(), lr=args.lr2, weight_decay=args.wd2) + opt = th.optim.Adam( + model.parameters(), lr=args.lr2, weight_decay=args.wd2 + ) model = model.to(args.device) loss_fn = nn.CrossEntropyLoss() diff --git a/examples/pytorch/mvgrl/node/main_sample.py b/examples/pytorch/mvgrl/node/main_sample.py index 8e163963cd39..8a8ef725c286 100644 --- a/examples/pytorch/mvgrl/node/main_sample.py +++ b/examples/pytorch/mvgrl/node/main_sample.py @@ -1,49 +1,95 @@ import argparse +import random +import warnings + import numpy as np import torch as th import torch.nn as nn -import random + import dgl -import warnings -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") from dataset import process_dataset, process_dataset_appnp from model import MVGRL, LogReg -parser = argparse.ArgumentParser(description='mvgrl') - -parser.add_argument('--dataname', type=str, default='cora', help='Name of dataset.') -parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using cpu.') -parser.add_argument('--epochs', type=int, default=500, help='Training epochs.') -parser.add_argument('--patience', type=int, default=20, help='Patient epochs to wait before early stopping.') -parser.add_argument('--lr1', type=float, default=0.001, help='Learning rate of mvgrl.') -parser.add_argument('--lr2', type=float, default=0.01, help='Learning rate of linear evaluator.') -parser.add_argument('--wd1', type=float, default=0., help='Weight decay of mvgrl.') -parser.add_argument('--wd2', type=float, default=0., help='Weight decay of linear evaluator.') -parser.add_argument('--epsilon', type=float, default=0.01, help='Edge mask threshold of diffusion graph.') -parser.add_argument("--hid_dim", type=int, default=512, help='Hidden layer dim.') -parser.add_argument("--sample_size", type=int, default=2000, help='Subgraph size.') +parser = argparse.ArgumentParser(description="mvgrl") + +parser.add_argument( + "--dataname", type=str, default="cora", help="Name of dataset." +) +parser.add_argument( + "--gpu", type=int, default=-1, help="GPU index. Default: -1, using cpu." +) +parser.add_argument("--epochs", type=int, default=500, help="Training epochs.") +parser.add_argument( + "--patience", + type=int, + default=20, + help="Patient epochs to wait before early stopping.", +) +parser.add_argument( + "--lr1", type=float, default=0.001, help="Learning rate of mvgrl." +) +parser.add_argument( + "--lr2", type=float, default=0.01, help="Learning rate of linear evaluator." +) +parser.add_argument( + "--wd1", type=float, default=0.0, help="Weight decay of mvgrl." +) +parser.add_argument( + "--wd2", type=float, default=0.0, help="Weight decay of linear evaluator." +) +parser.add_argument( + "--epsilon", + type=float, + default=0.01, + help="Edge mask threshold of diffusion graph.", +) +parser.add_argument( + "--hid_dim", type=int, default=512, help="Hidden layer dim." +) +parser.add_argument( + "--sample_size", type=int, default=2000, help="Subgraph size." +) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): - args.device = 'cuda:{}'.format(args.gpu) + args.device = "cuda:{}".format(args.gpu) else: - args.device = 'cpu' + args.device = "cpu" -if __name__ == '__main__': +if __name__ == "__main__": print(args) # Step 1: Prepare data =================================================================== # - if args.dataname == 'pubmed': - graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight = process_dataset_appnp(args.epsilon) + if args.dataname == "pubmed": + ( + graph, + diff_graph, + feat, + label, + train_idx, + val_idx, + test_idx, + edge_weight, + ) = process_dataset_appnp(args.epsilon) else: - graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight = process_dataset(args.dataname, args.epsilon) + ( + graph, + diff_graph, + feat, + label, + train_idx, + val_idx, + test_idx, + edge_weight, + ) = process_dataset(args.dataname, args.epsilon) edge_weight = th.tensor(edge_weight).float() - graph.ndata['feat'] = feat - diff_graph.edata['edge_weight'] = edge_weight + graph.ndata["feat"] = feat + diff_graph.edata["edge_weight"] = edge_weight n_feat = feat.shape[1] n_classes = np.unique(label).shape[0] @@ -67,13 +113,15 @@ model = model.to(args.device) # Step 3: Create training components ===================================================== # - optimizer = th.optim.Adam(model.parameters(), lr=args.lr1, weight_decay=args.wd1) + optimizer = th.optim.Adam( + model.parameters(), lr=args.lr1, weight_decay=args.wd1 + ) loss_fn = nn.BCEWithLogitsLoss() node_list = list(range(n_node)) # Step 4: Training epochs ================================================================ # - best = float('inf') + best = float("inf") cnt_wait = 0 for epoch in range(args.epochs): model.train() @@ -84,8 +132,8 @@ g = dgl.node_subgraph(graph, sample_idx) dg = dgl.node_subgraph(diff_graph, sample_idx) - f = g.ndata.pop('feat') - ew = dg.edata.pop('edge_weight') + f = g.ndata.pop("feat") + ew = dg.edata.pop("edge_weight") shuf_idx = np.random.permutation(sample_size) sf = f[shuf_idx, :] @@ -103,20 +151,20 @@ loss.backward() optimizer.step() - print('Epoch: {0}, Loss: {1:0.4f}'.format(epoch, loss.item())) + print("Epoch: {0}, Loss: {1:0.4f}".format(epoch, loss.item())) if loss < best: best = loss cnt_wait = 0 - th.save(model.state_dict(), 'model.pkl') + th.save(model.state_dict(), "model.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: - print('Early stopping') + print("Early stopping") break - model.load_state_dict(th.load('model.pkl')) + model.load_state_dict(th.load("model.pkl")) graph = graph.to(args.device) diff_graph = diff_graph.to(args.device) @@ -135,7 +183,9 @@ # Step 5: Linear evaluation ========================================================== # for _ in range(5): model = LogReg(args.hid_dim, n_classes) - opt = th.optim.Adam(model.parameters(), lr=args.lr2, weight_decay=args.wd2) + opt = th.optim.Adam( + model.parameters(), lr=args.lr2, weight_decay=args.wd2 + ) model = model.to(args.device) loss_fn = nn.CrossEntropyLoss() diff --git a/examples/pytorch/mvgrl/node/model.py b/examples/pytorch/mvgrl/node/model.py index 1cf0608ef043..643acaf5b045 100644 --- a/examples/pytorch/mvgrl/node/model.py +++ b/examples/pytorch/mvgrl/node/model.py @@ -4,6 +4,7 @@ from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch.glob import AvgPooling + class LogReg(nn.Module): def __init__(self, hid_dim, n_classes): super(LogReg, self).__init__() @@ -36,13 +37,17 @@ def forward(self, h1, h2, h3, h4, c1, c2): return logits -class MVGRL(nn.Module): +class MVGRL(nn.Module): def __init__(self, in_dim, out_dim): super(MVGRL, self).__init__() - self.encoder1 = GraphConv(in_dim, out_dim, norm='both', bias=True, activation=nn.PReLU()) - self.encoder2 = GraphConv(in_dim, out_dim, norm='none', bias=True, activation=nn.PReLU()) + self.encoder1 = GraphConv( + in_dim, out_dim, norm="both", bias=True, activation=nn.PReLU() + ) + self.encoder2 = GraphConv( + in_dim, out_dim, norm="none", bias=True, activation=nn.PReLU() + ) self.pooling = AvgPooling() self.disc = Discriminator(out_dim) @@ -66,4 +71,4 @@ def forward(self, graph, diff_graph, feat, shuf_feat, edge_weight): out = self.disc(h1, h2, h3, h4, c1, c2) - return out \ No newline at end of file + return out diff --git a/examples/pytorch/node2vec/main.py b/examples/pytorch/node2vec/main.py index a1a228f6ddf4..914cc8eaff55 100644 --- a/examples/pytorch/node2vec/main.py +++ b/examples/pytorch/node2vec/main.py @@ -1,8 +1,10 @@ import time -from dgl.sampling import node2vec_random_walk + from model import Node2vecModel from utils import load_graph, parse_arguments +from dgl.sampling import node2vec_random_walk + def time_randomwalk(graph, args): """ @@ -12,44 +14,50 @@ def time_randomwalk(graph, args): start_time = time.time() # default setting for testing - params = {'p': 0.25, - 'q': 4, - 'walk_length': 50} + params = {"p": 0.25, "q": 4, "walk_length": 50} for i in range(args.runs): node2vec_random_walk(graph, graph.nodes(), **params) end_time = time.time() - cost_time_avg = (end_time-start_time)/args.runs - print("Run dataset {} {} trials, mean run time: {:.3f}s".format(args.dataset, args.runs, cost_time_avg)) + cost_time_avg = (end_time - start_time) / args.runs + print( + "Run dataset {} {} trials, mean run time: {:.3f}s".format( + args.dataset, args.runs, cost_time_avg + ) + ) def train_node2vec(graph, eval_set, args): """ Train node2vec model """ - trainer = Node2vecModel(graph, - embedding_dim=args.embedding_dim, - walk_length=args.walk_length, - p=args.p, - q=args.q, - num_walks=args.num_walks, - eval_set=eval_set, - eval_steps=1, - device=args.device) + trainer = Node2vecModel( + graph, + embedding_dim=args.embedding_dim, + walk_length=args.walk_length, + p=args.p, + q=args.q, + num_walks=args.num_walks, + eval_set=eval_set, + eval_steps=1, + device=args.device, + ) - trainer.train(epochs=args.epochs, batch_size=args.batch_size, learning_rate=0.01) + trainer.train( + epochs=args.epochs, batch_size=args.batch_size, learning_rate=0.01 + ) -if __name__ == '__main__': +if __name__ == "__main__": args = parse_arguments() graph, eval_set = load_graph(args.dataset) - if args.task == 'train': + if args.task == "train": print("Perform training node2vec model") train_node2vec(graph, eval_set, args) - elif args.task == 'time': + elif args.task == "time": print("Timing random walks") time_randomwalk(graph, args) else: - raise ValueError('Task type error!') + raise ValueError("Task type error!") diff --git a/examples/pytorch/node2vec/model.py b/examples/pytorch/node2vec/model.py index 6934b7f7b6f6..b4c6208a8a0e 100644 --- a/examples/pytorch/node2vec/model.py +++ b/examples/pytorch/node2vec/model.py @@ -1,7 +1,8 @@ import torch import torch.nn as nn -from torch.utils.data import DataLoader from sklearn.linear_model import LogisticRegression +from torch.utils.data import DataLoader + from dgl.sampling import node2vec_random_walk @@ -39,8 +40,19 @@ class Node2vec(nn.Module): If omitted, DGL assumes that the neighbors are picked uniformly. """ - def __init__(self, g, embedding_dim, walk_length, p, q, num_walks=10, window_size=5, num_negatives=5, - use_sparse=True, weight_name=None): + def __init__( + self, + g, + embedding_dim, + walk_length, + p, + q, + num_walks=10, + window_size=5, + num_negatives=5, + use_sparse=True, + weight_name=None, + ): super(Node2vec, self).__init__() assert walk_length >= window_size @@ -75,13 +87,17 @@ def sample(self, batch): batch = batch.repeat(self.num_walks) # positive - pos_traces = node2vec_random_walk(self.g, batch, self.p, self.q, self.walk_length, self.prob) + pos_traces = node2vec_random_walk( + self.g, batch, self.p, self.q, self.walk_length, self.prob + ) pos_traces = pos_traces.unfold(1, self.window_size, 1) # rolling window pos_traces = pos_traces.contiguous().view(-1, self.window_size) # negative neg_batch = batch.repeat(self.num_negatives) - neg_traces = torch.randint(self.N, (neg_batch.size(0), self.walk_length)) + neg_traces = torch.randint( + self.N, (neg_batch.size(0), self.walk_length) + ) neg_traces = torch.cat([neg_batch.view(-1, 1), neg_traces], dim=-1) neg_traces = neg_traces.unfold(1, self.window_size, 1) # rolling window neg_traces = neg_traces.contiguous().view(-1, self.window_size) @@ -122,7 +138,10 @@ def loss(self, pos_trace, neg_trace): e = 1e-15 # Positive - pos_start, pos_rest = pos_trace[:, 0], pos_trace[:, 1:].contiguous() # start node and following trace + pos_start, pos_rest = ( + pos_trace[:, 0], + pos_trace[:, 1:].contiguous(), + ) # start node and following trace w_start = self.embedding(pos_start).unsqueeze(dim=1) w_rest = self.embedding(pos_rest) pos_out = (w_start * w_rest).sum(dim=-1).view(-1) @@ -154,7 +173,12 @@ def loader(self, batch_size): Node2vec training data loader """ - return DataLoader(torch.arange(self.N), batch_size=batch_size, shuffle=True, collate_fn=self.sample) + return DataLoader( + torch.arange(self.N), + batch_size=batch_size, + shuffle=True, + collate_fn=self.sample, + ) @torch.no_grad() def evaluate(self, x_train, y_train, x_val, y_val): @@ -166,7 +190,9 @@ def evaluate(self, x_train, y_train, x_val, y_val): x_train, y_train = x_train.cpu().numpy(), y_train.cpu().numpy() x_val, y_val = x_val.cpu().numpy(), y_val.cpu().numpy() - lr = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=150).fit(x_train, y_train) + lr = LogisticRegression( + solver="lbfgs", multi_class="auto", max_iter=150 + ).fit(x_train, y_train) return lr.score(x_val, y_val) @@ -213,26 +239,52 @@ class Node2vecModel(object): device, default 'cpu'. """ - def __init__(self, g, embedding_dim, walk_length, p=1.0, q=1.0, num_walks=1, window_size=5, - num_negatives=5, use_sparse=True, weight_name=None, eval_set=None, eval_steps=-1, device='cpu'): - - self.model = Node2vec(g, embedding_dim, walk_length, p, q, num_walks, - window_size, num_negatives, use_sparse, weight_name) + def __init__( + self, + g, + embedding_dim, + walk_length, + p=1.0, + q=1.0, + num_walks=1, + window_size=5, + num_negatives=5, + use_sparse=True, + weight_name=None, + eval_set=None, + eval_steps=-1, + device="cpu", + ): + + self.model = Node2vec( + g, + embedding_dim, + walk_length, + p, + q, + num_walks, + window_size, + num_negatives, + use_sparse, + weight_name, + ) self.g = g self.use_sparse = use_sparse self.eval_steps = eval_steps self.eval_set = eval_set - if device == 'cpu': + if device == "cpu": self.device = device else: - self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.device = "cuda" if torch.cuda.is_available() else "cpu" def _train_step(self, model, loader, optimizer, device): model.train() total_loss = 0 for pos_traces, neg_traces in loader: - pos_traces, neg_traces = pos_traces.to(device), neg_traces.to(device) + pos_traces, neg_traces = pos_traces.to(device), neg_traces.to( + device + ) optimizer.zero_grad() loss = model.loss(pos_traces, neg_traces) loss.backward() @@ -265,15 +317,23 @@ def train(self, epochs, batch_size, learning_rate=0.01): self.model = self.model.to(self.device) loader = self.model.loader(batch_size) if self.use_sparse: - optimizer = torch.optim.SparseAdam(list(self.model.parameters()), lr=learning_rate) + optimizer = torch.optim.SparseAdam( + list(self.model.parameters()), lr=learning_rate + ) else: - optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate) + optimizer = torch.optim.Adam( + self.model.parameters(), lr=learning_rate + ) for i in range(epochs): loss = self._train_step(self.model, loader, optimizer, self.device) if self.eval_steps > 0: if epochs % self.eval_steps == 0: acc = self._evaluate_step() - print("Epoch: {}, Train Loss: {:.4f}, Val Acc: {:.4f}".format(i, loss, acc)) + print( + "Epoch: {}, Train Loss: {:.4f}, Val Acc: {:.4f}".format( + i, loss, acc + ) + ) def embedding(self, nodes=None): """ diff --git a/examples/pytorch/node2vec/utils.py b/examples/pytorch/node2vec/utils.py index 01e55d677a3a..809422efc4f5 100644 --- a/examples/pytorch/node2vec/utils.py +++ b/examples/pytorch/node2vec/utils.py @@ -1,34 +1,36 @@ import argparse -from dgl.data import CitationGraphDataset -from ogb.nodeproppred import * + from ogb.linkproppred import * +from ogb.nodeproppred import * + +from dgl.data import CitationGraphDataset def load_graph(name): - cite_graphs = ['cora', 'citeseer', 'pubmed'] + cite_graphs = ["cora", "citeseer", "pubmed"] if name in cite_graphs: dataset = CitationGraphDataset(name) graph = dataset[0] nodes = graph.nodes() - y = graph.ndata['label'] - train_mask = graph.ndata['train_mask'] - val_mask = graph.ndata['test_mask'] + y = graph.ndata["label"] + train_mask = graph.ndata["train_mask"] + val_mask = graph.ndata["test_mask"] nodes_train, y_train = nodes[train_mask], y[train_mask] nodes_val, y_val = nodes[val_mask], y[val_mask] eval_set = [(nodes_train, y_train), (nodes_val, y_val)] - elif name.startswith('ogbn'): + elif name.startswith("ogbn"): dataset = DglNodePropPredDataset(name) graph, y = dataset[0] split_nodes = dataset.get_idx_split() nodes = graph.nodes() - train_idx = split_nodes['train'] - val_idx = split_nodes['valid'] + train_idx = split_nodes["train"] + val_idx = split_nodes["valid"] nodes_train, y_train = nodes[train_idx], y[train_idx] nodes_val, y_val = nodes[val_idx], y[val_idx] @@ -44,19 +46,19 @@ def parse_arguments(): """ Parse arguments """ - parser = argparse.ArgumentParser(description='Node2vec') - parser.add_argument('--dataset', type=str, default='cora') + parser = argparse.ArgumentParser(description="Node2vec") + parser.add_argument("--dataset", type=str, default="cora") # 'train' for training node2vec model, 'time' for testing speed of random walk - parser.add_argument('--task', type=str, default='train') - parser.add_argument('--runs', type=int, default=10) - parser.add_argument('--device', type=str, default='cpu') - parser.add_argument('--embedding_dim', type=int, default=128) - parser.add_argument('--walk_length', type=int, default=50) - parser.add_argument('--p', type=float, default=0.25) - parser.add_argument('--q', type=float, default=4.0) - parser.add_argument('--num_walks', type=int, default=10) - parser.add_argument('--epochs', type=int, default=100) - parser.add_argument('--batch_size', type=int, default=128) + parser.add_argument("--task", type=str, default="train") + parser.add_argument("--runs", type=int, default=10) + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--embedding_dim", type=int, default=128) + parser.add_argument("--walk_length", type=int, default=50) + parser.add_argument("--p", type=float, default=0.25) + parser.add_argument("--q", type=float, default=4.0) + parser.add_argument("--num_walks", type=int, default=10) + parser.add_argument("--epochs", type=int, default=100) + parser.add_argument("--batch_size", type=int, default=128) args = parser.parse_args() diff --git a/examples/pytorch/ogb/cluster-gat/main.py b/examples/pytorch/ogb/cluster-gat/main.py index fb8dee48f2be..5e5869aab908 100644 --- a/examples/pytorch/ogb/cluster-gat/main.py +++ b/examples/pytorch/ogb/cluster-gat/main.py @@ -1,57 +1,73 @@ -import dgl +import argparse +import time from functools import partial + import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim -from torch.utils.data import DataLoader -import dgl.nn.pytorch as dglnn -import time -import argparse import tqdm from ogb.nodeproppred import DglNodePropPredDataset - from sampler import ClusterIter, subgraph_collate_fn +from torch.utils.data import DataLoader + +import dgl +import dgl.nn.pytorch as dglnn + class GAT(nn.Module): - def __init__(self, - in_feats, - num_heads, - n_hidden, - n_classes, - n_layers, - activation, - dropout=0.): + def __init__( + self, + in_feats, + num_heads, + n_hidden, + n_classes, + n_layers, + activation, + dropout=0.0, + ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.num_heads = num_heads - self.layers.append(dglnn.GATConv(in_feats, - n_hidden, - num_heads=num_heads, - feat_drop=dropout, - attn_drop=dropout, - activation=activation, - negative_slope=0.2)) + self.layers.append( + dglnn.GATConv( + in_feats, + n_hidden, + num_heads=num_heads, + feat_drop=dropout, + attn_drop=dropout, + activation=activation, + negative_slope=0.2, + ) + ) for i in range(1, n_layers - 1): - self.layers.append(dglnn.GATConv(n_hidden * num_heads, - n_hidden, - num_heads=num_heads, - feat_drop=dropout, - attn_drop=dropout, - activation=activation, - negative_slope=0.2)) - self.layers.append(dglnn.GATConv(n_hidden * num_heads, - n_classes, - num_heads=num_heads, - feat_drop=dropout, - attn_drop=dropout, - activation=None, - negative_slope=0.2)) - + self.layers.append( + dglnn.GATConv( + n_hidden * num_heads, + n_hidden, + num_heads=num_heads, + feat_drop=dropout, + attn_drop=dropout, + activation=activation, + negative_slope=0.2, + ) + ) + self.layers.append( + dglnn.GATConv( + n_hidden * num_heads, + n_classes, + num_heads=num_heads, + feat_drop=dropout, + attn_drop=dropout, + activation=None, + negative_slope=0.2, + ) + ) + def forward(self, g, x): h = x for l, conv in enumerate(self.layers): @@ -72,24 +88,35 @@ def inference(self, g, x, batch_size, device): num_heads = self.num_heads for l, layer in enumerate(self.layers): if l < self.n_layers - 1: - y = th.zeros(g.num_nodes(), self.n_hidden * num_heads if l != len(self.layers) - 1 else self.n_classes) + y = th.zeros( + g.num_nodes(), + self.n_hidden * num_heads + if l != len(self.layers) - 1 + else self.n_classes, + ) else: - y = th.zeros(g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) + y = th.zeros( + g.num_nodes(), + self.n_hidden + if l != len(self.layers) - 1 + else self.n_classes, + ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( - g, - th.arange(g.num_nodes()), - sampler, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=args.num_workers) + g, + th.arange(g.num_nodes()), + sampler, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=args.num_workers, + ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes].to(device) if l < self.n_layers - 1: - h = layer(block, h).flatten(1) + h = layer(block, h).flatten(1) else: h = layer(block, h) h = h.mean(1) @@ -99,12 +126,14 @@ def inference(self, g, x, batch_size, device): x = y return y + def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) + def evaluate(model, g, nfeat, labels, val_nid, test_nid, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. @@ -119,22 +148,45 @@ def evaluate(model, g, nfeat, labels, val_nid, test_nid, batch_size, device): with th.no_grad(): pred = model.inference(g, nfeat, batch_size, device) model.train() - labels_cpu = labels.to(th.device('cpu')) - return compute_acc(pred[val_nid], labels_cpu[val_nid]), compute_acc(pred[test_nid], labels_cpu[test_nid]), pred + labels_cpu = labels.to(th.device("cpu")) + return ( + compute_acc(pred[val_nid], labels_cpu[val_nid]), + compute_acc(pred[test_nid], labels_cpu[test_nid]), + pred, + ) + def model_param_summary(model): - """ Count the model parameters """ + """Count the model parameters""" cnt = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Total Params {}".format(cnt)) + #### Entry point def run(args, device, data, nfeat): # Unpack data - train_nid, val_nid, test_nid, in_feats, labels, n_classes, g, cluster_iterator = data + ( + train_nid, + val_nid, + test_nid, + in_feats, + labels, + n_classes, + g, + cluster_iterator, + ) = data labels = labels.to(device) # Define model and optimizer - model = GAT(in_feats, args.num_heads, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) + model = GAT( + in_feats, + args.num_heads, + args.num_hidden, + n_classes, + args.num_layers, + F.relu, + args.dropout, + ) model_param_summary(model) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) @@ -153,7 +205,7 @@ def run(args, device, data, nfeat): # blocks. tic_start = time.time() for step, cluster in enumerate(cluster_iterator): - mask = cluster.ndata.pop('train_mask') + mask = cluster.ndata.pop("train_mask") if mask.sum() == 0: continue cluster.edata.pop(dgl.EID) @@ -173,99 +225,156 @@ def run(args, device, data, nfeat): loss.backward() optimizer.step() tic_back = time.time() - iter_load += (tic_step - tic_start) - iter_far += (tic_far - tic_step) - iter_back += (tic_back - tic_far) + iter_load += tic_step - tic_start + iter_far += tic_far - tic_step + iter_back += tic_back - tic_far if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) - gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 - print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB'.format( - epoch, step, loss.item(), acc.item(), gpu_mem_alloc)) + gpu_mem_alloc = ( + th.cuda.max_memory_allocated() / 1000000 + if th.cuda.is_available() + else 0 + ) + print( + "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB".format( + epoch, step, loss.item(), acc.item(), gpu_mem_alloc + ) + ) tic_start = time.time() toc = time.time() - print('Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}'.format(toc - tic, iter_load, iter_far, iter_back)) + print( + "Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}".format( + toc - tic, iter_load, iter_far, iter_back + ) + ) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: - eval_acc, test_acc, pred = evaluate(model, g, nfeat, labels, val_nid, test_nid, args.val_batch_size, device) + eval_acc, test_acc, pred = evaluate( + model, + g, + nfeat, + labels, + val_nid, + test_nid, + args.val_batch_size, + device, + ) model = model.to(device) if args.save_pred: - np.savetxt(args.save_pred + '%02d' % epoch, pred.argmax(1).cpu().numpy(), '%d') - print('Eval Acc {:.4f}'.format(eval_acc)) + np.savetxt( + args.save_pred + "%02d" % epoch, + pred.argmax(1).cpu().numpy(), + "%d", + ) + print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc - print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format(best_eval_acc, best_test_acc)) - print('Avg epoch time: {}'.format(avg / (epoch - 4))) - return best_test_acc.to(th.device('cpu')) + print( + "Best Eval Acc {:.4f} Test Acc {:.4f}".format( + best_eval_acc, best_test_acc + ) + ) + print("Avg epoch time: {}".format(avg / (epoch - 4))) + return best_test_acc.to(th.device("cpu")) + -if __name__ == '__main__': +if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") - argparser.add_argument('--gpu', type=int, default=0, - help="GPU device ID. Use -1 for CPU training") - argparser.add_argument('--num-epochs', type=int, default=20) - argparser.add_argument('--num-hidden', type=int, default=128) - argparser.add_argument('--num-layers', type=int, default=3) - argparser.add_argument('--num-heads', type=int, default=8) - argparser.add_argument('--batch-size', type=int, default=32) - argparser.add_argument('--val-batch-size', type=int, default=2000) - argparser.add_argument('--log-every', type=int, default=20) - argparser.add_argument('--eval-every', type=int, default=1) - argparser.add_argument('--lr', type=float, default=0.001) - argparser.add_argument('--dropout', type=float, default=0.5) - argparser.add_argument('--save-pred', type=str, default='') - argparser.add_argument('--wd', type=float, default=0) - argparser.add_argument('--num_partitions', type=int, default=15000) - argparser.add_argument('--num-workers', type=int, default=0) - argparser.add_argument('--data-cpu', action='store_true', - help="By default the script puts all node features and labels " - "on GPU when using it to save time for data copy. This may " - "be undesired if they cannot fit in GPU memory at once. " - "This flag disables that.") + argparser.add_argument( + "--gpu", + type=int, + default=0, + help="GPU device ID. Use -1 for CPU training", + ) + argparser.add_argument("--num-epochs", type=int, default=20) + argparser.add_argument("--num-hidden", type=int, default=128) + argparser.add_argument("--num-layers", type=int, default=3) + argparser.add_argument("--num-heads", type=int, default=8) + argparser.add_argument("--batch-size", type=int, default=32) + argparser.add_argument("--val-batch-size", type=int, default=2000) + argparser.add_argument("--log-every", type=int, default=20) + argparser.add_argument("--eval-every", type=int, default=1) + argparser.add_argument("--lr", type=float, default=0.001) + argparser.add_argument("--dropout", type=float, default=0.5) + argparser.add_argument("--save-pred", type=str, default="") + argparser.add_argument("--wd", type=float, default=0) + argparser.add_argument("--num_partitions", type=int, default=15000) + argparser.add_argument("--num-workers", type=int, default=0) + argparser.add_argument( + "--data-cpu", + action="store_true", + help="By default the script puts all node features and labels " + "on GPU when using it to save time for data copy. This may " + "be undesired if they cannot fit in GPU memory at once. " + "This flag disables that.", + ) args = argparser.parse_args() if args.gpu >= 0: - device = th.device('cuda:%d' % args.gpu) + device = th.device("cuda:%d" % args.gpu) else: - device = th.device('cpu') + device = th.device("cpu") # load ogbn-products data - data = DglNodePropPredDataset(name='ogbn-products') + data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test'] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] labels = labels[:, 0] - print('Total edges before adding self-loop {}'.format(graph.num_edges())) + print("Total edges before adding self-loop {}".format(graph.num_edges())) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) - print('Total edges after adding self-loop {}'.format(graph.num_edges())) + print("Total edges after adding self-loop {}".format(graph.num_edges())) num_nodes = train_idx.shape[0] + val_idx.shape[0] + test_idx.shape[0] assert num_nodes == graph.num_nodes() mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True - graph.ndata['train_mask'] = mask + graph.ndata["train_mask"] = mask graph.in_degrees(0) graph.out_degrees(0) graph.find_edges(0) cluster_iter_data = ClusterIter( - 'ogbn-products', graph, args.num_partitions, args.batch_size) - cluster_iterator = DataLoader(cluster_iter_data, batch_size=args.batch_size, shuffle=True, - pin_memory=True, num_workers=4, - collate_fn=partial(subgraph_collate_fn, graph)) + "ogbn-products", graph, args.num_partitions, args.batch_size + ) + cluster_iterator = DataLoader( + cluster_iter_data, + batch_size=args.batch_size, + shuffle=True, + pin_memory=True, + num_workers=4, + collate_fn=partial(subgraph_collate_fn, graph), + ) - in_feats = graph.ndata['feat'].shape[1] + in_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() # Pack data - data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator + data = ( + train_idx, + val_idx, + test_idx, + in_feats, + labels, + n_classes, + graph, + cluster_iterator, + ) # Run 10 times test_accs = [] - nfeat = graph.ndata.pop('feat').to(device) + nfeat = graph.ndata.pop("feat").to(device) for i in range(10): test_accs.append(run(args, device, data, nfeat)) - print('Average test accuracy:', np.mean(test_accs), '±', np.std(test_accs)) + print( + "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) + ) diff --git a/examples/pytorch/ogb/cluster-gat/partition_utils.py b/examples/pytorch/ogb/cluster-gat/partition_utils.py index d9d2e6c8c1ee..9fa55f8804d3 100644 --- a/examples/pytorch/ogb/cluster-gat/partition_utils.py +++ b/examples/pytorch/ogb/cluster-gat/partition_utils.py @@ -3,8 +3,9 @@ import numpy as np import dgl -from dgl.transforms import metis_partition from dgl import backend as F +from dgl.transforms import metis_partition + def get_partition_list(g, psize): p_gs = metis_partition(g, psize) diff --git a/examples/pytorch/ogb/cluster-gat/sampler.py b/examples/pytorch/ogb/cluster-gat/sampler.py index e29716ec4ac6..66f5a1dd76ac 100644 --- a/examples/pytorch/ogb/cluster-gat/sampler.py +++ b/examples/pytorch/ogb/cluster-gat/sampler.py @@ -1,13 +1,14 @@ import os import torch - from partition_utils import * + class ClusterIter(object): - '''The partition sampler given a DGLGraph and partition number. + """The partition sampler given a DGLGraph and partition number. The metis is used as the graph partition backend. - ''' + """ + def __init__(self, dn, g, psize, batch_size): """Initialize the sampler. @@ -26,11 +27,11 @@ def __init__(self, dn, g, psize, batch_size): self.batch_size = batch_size # cache the partitions of known datasets&partition number if dn: - fn = os.path.join('./datasets/', dn + '_{}.npy'.format(psize)) + fn = os.path.join("./datasets/", dn + "_{}.npy".format(psize)) if os.path.exists(fn): self.par_li = np.load(fn, allow_pickle=True) else: - os.makedirs('./datasets/', exist_ok=True) + os.makedirs("./datasets/", exist_ok=True) self.par_li = get_partition_list(g, psize) np.save(fn, self.par_li) else: @@ -47,6 +48,7 @@ def __len__(self): def __getitem__(self, idx): return self.par_li[idx] + def subgraph_collate_fn(g, batch): nids = np.concatenate(batch).reshape(-1).astype(np.int64) g1 = g.subgraph(nids) diff --git a/examples/pytorch/ogb/cluster-sage/main.py b/examples/pytorch/ogb/cluster-sage/main.py index ed4305a3b62a..42c90c9b814a 100644 --- a/examples/pytorch/ogb/cluster-sage/main.py +++ b/examples/pytorch/ogb/cluster-sage/main.py @@ -1,42 +1,40 @@ -import dgl +import argparse +import time +import traceback +from functools import partial + import numpy as np import torch as th +import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim -import torch.multiprocessing as mp +import tqdm +from ogb.nodeproppred import DglNodePropPredDataset +from sampler import ClusterIter, subgraph_collate_fn from torch.utils.data import DataLoader + +import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn -import time -import argparse from dgl.data import RedditDataset -import tqdm -import traceback -from ogb.nodeproppred import DglNodePropPredDataset -from functools import partial - -from sampler import ClusterIter, subgraph_collate_fn #### Neighbor sampler + class SAGE(nn.Module): - def __init__(self, - in_feats, - n_hidden, - n_classes, - n_layers, - activation, - dropout): + def __init__( + self, in_feats, n_hidden, n_classes, n_layers, activation, dropout + ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() - self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) + self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): - self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) - self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) + self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) + self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation @@ -70,12 +68,14 @@ def inference(self, g, x, batch_size, device): return h + def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) + def evaluate(model, g, labels, val_nid, test_nid, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. @@ -88,28 +88,49 @@ def evaluate(model, g, labels, val_nid, test_nid, batch_size, device): """ model.eval() with th.no_grad(): - inputs = g.ndata['feat'] + inputs = g.ndata["feat"] model = model.cpu() pred = model.inference(g, inputs, batch_size, device) model.train() - return compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred + return ( + compute_acc(pred[val_nid], labels[val_nid]), + compute_acc(pred[test_nid], labels[test_nid]), + pred, + ) + def load_subtensor(g, labels, seeds, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ - batch_inputs = g.ndata['feat'][input_nodes].to(device) + batch_inputs = g.ndata["feat"][input_nodes].to(device) batch_labels = labels[seeds].to(device) return batch_inputs, batch_labels + #### Entry point def run(args, device, data): # Unpack data - train_nid, val_nid, test_nid, in_feats, labels, n_classes, g, cluster_iterator = data - + ( + train_nid, + val_nid, + test_nid, + in_feats, + labels, + n_classes, + g, + cluster_iterator, + ) = data # Define model and optimizer - model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) + model = SAGE( + in_feats, + args.num_hidden, + n_classes, + args.num_layers, + F.relu, + args.dropout, + ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) @@ -132,11 +153,11 @@ def run(args, device, data): tic_start = time.time() for step, cluster in enumerate(cluster_iterator): cluster = cluster.int().to(device) - mask = cluster.ndata['train_mask'].to(device) + mask = cluster.ndata["train_mask"].to(device) if mask.sum() == 0: continue - feat = cluster.ndata['feat'].to(device) - batch_labels = cluster.ndata['labels'].to(device) + feat = cluster.ndata["feat"].to(device) + batch_labels = cluster.ndata["labels"].to(device) tic_step = time.time() batch_pred = model(cluster, feat) @@ -148,94 +169,147 @@ def run(args, device, data): loss.backward() optimizer.step() tic_back = time.time() - iter_load += (tic_step - tic_start) - iter_far += (tic_far - tic_step) - iter_back += (tic_back - tic_far) + iter_load += tic_step - tic_start + iter_far += tic_far - tic_step + iter_back += tic_back - tic_far tic_start = time.time() if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) - gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 - print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB'.format( - epoch, step, loss.item(), acc.item(), gpu_mem_alloc)) + gpu_mem_alloc = ( + th.cuda.max_memory_allocated() / 1000000 + if th.cuda.is_available() + else 0 + ) + print( + "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB".format( + epoch, step, loss.item(), acc.item(), gpu_mem_alloc + ) + ) toc = time.time() - print('Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}'.format(toc - tic, iter_load, iter_far, iter_back)) + print( + "Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}".format( + toc - tic, iter_load, iter_far, iter_back + ) + ) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: - eval_acc, test_acc, pred = evaluate(model, g, labels, val_nid, test_nid, args.val_batch_size, device) + eval_acc, test_acc, pred = evaluate( + model, g, labels, val_nid, test_nid, args.val_batch_size, device + ) model = model.to(device) if args.save_pred: - np.savetxt(args.save_pred + '%02d' % epoch, pred.argmax(1).cpu().numpy(), '%d') - print('Eval Acc {:.4f}'.format(eval_acc)) + np.savetxt( + args.save_pred + "%02d" % epoch, + pred.argmax(1).cpu().numpy(), + "%d", + ) + print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc - print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format(best_eval_acc, best_test_acc)) - print('Avg epoch time: {}'.format(avg / (epoch - 4))) + print( + "Best Eval Acc {:.4f} Test Acc {:.4f}".format( + best_eval_acc, best_test_acc + ) + ) + print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc -if __name__ == '__main__': + +if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") - argparser.add_argument('--gpu', type=int, default=0, - help="GPU device ID. Use -1 for CPU training") - argparser.add_argument('--num-epochs', type=int, default=30) - argparser.add_argument('--num-hidden', type=int, default=256) - argparser.add_argument('--num-layers', type=int, default=3) - argparser.add_argument('--batch-size', type=int, default=32) - argparser.add_argument('--val-batch-size', type=int, default=10000) - argparser.add_argument('--log-every', type=int, default=20) - argparser.add_argument('--eval-every', type=int, default=1) - argparser.add_argument('--lr', type=float, default=0.001) - argparser.add_argument('--dropout', type=float, default=0.5) - argparser.add_argument('--save-pred', type=str, default='') - argparser.add_argument('--wd', type=float, default=0) - argparser.add_argument('--num_partitions', type=int, default=15000) + argparser.add_argument( + "--gpu", + type=int, + default=0, + help="GPU device ID. Use -1 for CPU training", + ) + argparser.add_argument("--num-epochs", type=int, default=30) + argparser.add_argument("--num-hidden", type=int, default=256) + argparser.add_argument("--num-layers", type=int, default=3) + argparser.add_argument("--batch-size", type=int, default=32) + argparser.add_argument("--val-batch-size", type=int, default=10000) + argparser.add_argument("--log-every", type=int, default=20) + argparser.add_argument("--eval-every", type=int, default=1) + argparser.add_argument("--lr", type=float, default=0.001) + argparser.add_argument("--dropout", type=float, default=0.5) + argparser.add_argument("--save-pred", type=str, default="") + argparser.add_argument("--wd", type=float, default=0) + argparser.add_argument("--num_partitions", type=int, default=15000) args = argparser.parse_args() - + if args.gpu >= 0: - device = th.device('cuda:%d' % args.gpu) + device = th.device("cuda:%d" % args.gpu) else: - device = th.device('cpu') + device = th.device("cpu") # load ogbn-products data - data = DglNodePropPredDataset(name='ogbn-products') + data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test'] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] labels = labels[:, 0] num_nodes = train_idx.shape[0] + val_idx.shape[0] + test_idx.shape[0] assert num_nodes == graph.number_of_nodes() - graph.ndata['labels'] = labels + graph.ndata["labels"] = labels mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True - graph.ndata['train_mask'] = mask + graph.ndata["train_mask"] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[val_idx] = True - graph.ndata['valid_mask'] = mask + graph.ndata["valid_mask"] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[test_idx] = True - graph.ndata['test_mask'] = mask + graph.ndata["test_mask"] = mask graph.in_degree(0) graph.out_degree(0) graph.find_edges(0) cluster_iter_data = ClusterIter( - 'ogbn-products', graph, args.num_partitions, args.batch_size, th.cat([train_idx, val_idx, test_idx])) + "ogbn-products", + graph, + args.num_partitions, + args.batch_size, + th.cat([train_idx, val_idx, test_idx]), + ) idx = th.arange(args.num_partitions // args.batch_size) - cluster_iterator = DataLoader(cluster_iter_data, batch_size=32, shuffle=True, pin_memory=True, num_workers=4, collate_fn=partial(subgraph_collate_fn, graph)) + cluster_iterator = DataLoader( + cluster_iter_data, + batch_size=32, + shuffle=True, + pin_memory=True, + num_workers=4, + collate_fn=partial(subgraph_collate_fn, graph), + ) - in_feats = graph.ndata['feat'].shape[1] + in_feats = graph.ndata["feat"].shape[1] print(in_feats) n_classes = (labels.max() + 1).item() # Pack data - data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator + data = ( + train_idx, + val_idx, + test_idx, + in_feats, + labels, + n_classes, + graph, + cluster_iterator, + ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data)) - print('Average test accuracy:', np.mean(test_accs), '±', np.std(test_accs)) + print( + "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) + ) diff --git a/examples/pytorch/ogb/cluster-sage/partition_utils.py b/examples/pytorch/ogb/cluster-sage/partition_utils.py index d9d2e6c8c1ee..9fa55f8804d3 100644 --- a/examples/pytorch/ogb/cluster-sage/partition_utils.py +++ b/examples/pytorch/ogb/cluster-sage/partition_utils.py @@ -3,8 +3,9 @@ import numpy as np import dgl -from dgl.transforms import metis_partition from dgl import backend as F +from dgl.transforms import metis_partition + def get_partition_list(g, psize): p_gs = metis_partition(g, psize) diff --git a/examples/pytorch/ogb/cluster-sage/sampler.py b/examples/pytorch/ogb/cluster-sage/sampler.py index 3a0caab7de73..4b30cd0a3aa1 100644 --- a/examples/pytorch/ogb/cluster-sage/sampler.py +++ b/examples/pytorch/ogb/cluster-sage/sampler.py @@ -1,17 +1,18 @@ import os import random - -import dgl.function as fn -import torch import time +import torch from partition_utils import * +import dgl.function as fn + class ClusterIter(object): - '''The partition sampler given a DGLGraph and partition number. + """The partition sampler given a DGLGraph and partition number. The metis is used as the graph partition backend. - ''' + """ + def __init__(self, dn, g, psize, batch_size, seed_nid): """Initialize the sampler. @@ -32,11 +33,11 @@ def __init__(self, dn, g, psize, batch_size, seed_nid): self.batch_size = batch_size # cache the partitions of known datasets&partition number if dn: - fn = os.path.join('./datasets/', dn + '_{}.npy'.format(psize)) + fn = os.path.join("./datasets/", dn + "_{}.npy".format(psize)) if os.path.exists(fn): self.par_li = np.load(fn, allow_pickle=True) else: - os.makedirs('./datasets/', exist_ok=True) + os.makedirs("./datasets/", exist_ok=True) self.par_li = get_partition_list(g, psize) np.save(fn, self.par_li) else: @@ -49,9 +50,9 @@ def __init__(self, dn, g, psize, batch_size, seed_nid): # use one side normalization def get_norm(self, g): - norm = 1. / g.in_degrees().float().unsqueeze(1) + norm = 1.0 / g.in_degrees().float().unsqueeze(1) norm[torch.isinf(norm)] = 0 - norm = norm.to(self.g.ndata['feat'].device) + norm = norm.to(self.g.ndata["feat"].device) return norm def __len__(self): @@ -60,6 +61,7 @@ def __len__(self): def __getitem__(self, idx): return self.par_li[idx] + def subgraph_collate_fn(g, batch): nids = np.concatenate(batch).reshape(-1).astype(np.int64) g1 = g.subgraph(nids) diff --git a/examples/pytorch/ogb/deepwalk/deepwalk.py b/examples/pytorch/ogb/deepwalk/deepwalk.py index e3cedc9ca9e0..a17cda8707f0 100644 --- a/examples/pytorch/ogb/deepwalk/deepwalk.py +++ b/examples/pytorch/ogb/deepwalk/deepwalk.py @@ -1,20 +1,22 @@ -import torch import argparse -import dgl -import torch.multiprocessing as mp -from torch.utils.data import DataLoader import os import random import time -import numpy as np -from reading_data import DeepwalkDataset +import numpy as np +import torch +import torch.multiprocessing as mp from model import SkipGramModel +from reading_data import DeepwalkDataset +from torch.utils.data import DataLoader from utils import shuffle_walks, sum_up_params +import dgl + + class DeepwalkTrainer: def __init__(self, args): - """ Initializing the trainer with the input arguments """ + """Initializing the trainer with the input arguments""" self.args = args self.dataset = DeepwalkDataset( net_file=args.data_file, @@ -28,20 +30,22 @@ def __init__(self, args): fast_neg=args.fast_neg, ogbl_name=args.ogbl_name, load_from_ogbl=args.load_from_ogbl, - ) + ) self.emb_size = self.dataset.G.number_of_nodes() self.emb_model = None def init_device_emb(self): - """ set the device before training + """set the device before training will be called once in fast_train_mp / fast_train """ choices = sum([self.args.only_gpu, self.args.only_cpu, self.args.mix]) - assert choices == 1, "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" - + assert ( + choices == 1 + ), "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" + # initializing embedding on CPU self.emb_model = SkipGramModel( - emb_size=self.emb_size, + emb_size=self.emb_size, emb_dimension=self.args.dim, walk_length=self.args.walk_length, window_size=self.args.window_size, @@ -59,8 +63,8 @@ def init_device_emb(self): use_context_weight=self.args.use_context_weight, async_update=self.args.async_update, num_threads=self.args.num_threads, - ) - + ) + torch.set_num_threads(self.args.num_threads) if self.args.only_gpu: print("Run in 1 GPU") @@ -69,22 +73,23 @@ def init_device_emb(self): elif self.args.mix: print("Mix CPU with %d GPU" % len(self.args.gpus)) if len(self.args.gpus) == 1: - assert self.args.gpus[0] >= 0, 'mix CPU with GPU should have available GPU' + assert ( + self.args.gpus[0] >= 0 + ), "mix CPU with GPU should have available GPU" self.emb_model.set_device(self.args.gpus[0]) else: print("Run in CPU process") - self.args.gpus = [torch.device('cpu')] - + self.args.gpus = [torch.device("cpu")] def train(self): - """ train the embedding """ + """train the embedding""" if len(self.args.gpus) > 1: self.fast_train_mp() else: self.fast_train() def fast_train_mp(self): - """ multi-cpu-core or mix cpu & multi-gpu """ + """multi-cpu-core or mix cpu & multi-gpu""" self.init_device_emb() self.emb_model.share_memory() @@ -95,26 +100,34 @@ def fast_train_mp(self): ps = [] for i in range(len(self.args.gpus)): - p = mp.Process(target=self.fast_train_sp, args=(i, self.args.gpus[i])) + p = mp.Process( + target=self.fast_train_sp, args=(i, self.args.gpus[i]) + ) ps.append(p) p.start() for p in ps: p.join() - - print("Used time: %.2fs" % (time.time()-start_all)) + + print("Used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_txt: - self.emb_model.save_embedding_txt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_txt( + self.dataset, self.args.output_emb_file + ) elif self.args.save_in_pt: - self.emb_model.save_embedding_pt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_pt( + self.dataset, self.args.output_emb_file + ) else: - self.emb_model.save_embedding(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding( + self.dataset, self.args.output_emb_file + ) def fast_train_sp(self, rank, gpu_id): - """ a subprocess for fast_train_mp """ + """a subprocess for fast_train_mp""" if self.args.mix: self.emb_model.set_device(gpu_id) - + torch.set_num_threads(self.args.num_threads) if self.args.async_update: self.emb_model.create_async_update() @@ -128,13 +141,18 @@ def fast_train_sp(self, rank, gpu_id): shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, - ) + ) num_batches = len(dataloader) - print("num batchs: %d in process [%d] GPU [%d]" % (num_batches, rank, gpu_id)) + print( + "num batchs: %d in process [%d] GPU [%d]" + % (num_batches, rank, gpu_id) + ) # number of positive node pairs in a sequence - num_pos = int(2 * self.args.walk_length * self.args.window_size\ - - self.args.window_size * (self.args.window_size + 1)) - + num_pos = int( + 2 * self.args.walk_length * self.args.window_size + - self.args.window_size * (self.args.window_size + 1) + ) + start = time.time() with torch.no_grad(): for i, walks in enumerate(dataloader): @@ -144,28 +162,44 @@ def fast_train_sp(self, rank, gpu_id): # do negative sampling bs = len(walks) neg_nodes = torch.LongTensor( - np.random.choice(self.dataset.neg_table, - bs * num_pos * self.args.negative, - replace=True)) + np.random.choice( + self.dataset.neg_table, + bs * num_pos * self.args.negative, + replace=True, + ) + ) self.emb_model.fast_learn(walks, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: - print("GPU-[%d] batch %d time: %.2fs loss: %.4f" \ - % (gpu_id, i, time.time()-start, -sum(self.emb_model.loss)/self.args.print_interval)) + print( + "GPU-[%d] batch %d time: %.2fs loss: %.4f" + % ( + gpu_id, + i, + time.time() - start, + -sum(self.emb_model.loss) + / self.args.print_interval, + ) + ) self.emb_model.loss = [] else: - print("GPU-[%d] batch %d time: %.2fs" % (gpu_id, i, time.time()-start)) + print( + "GPU-[%d] batch %d time: %.2fs" + % (gpu_id, i, time.time() - start) + ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() def fast_train(self): - """ fast train with dataloader with only gpu / only cpu""" + """fast train with dataloader with only gpu / only cpu""" # the number of postive node pairs of a node sequence - num_pos = 2 * self.args.walk_length * self.args.window_size\ + num_pos = ( + 2 * self.args.walk_length * self.args.window_size - self.args.window_size * (self.args.window_size + 1) + ) num_pos = int(num_pos) self.init_device_emb() @@ -186,8 +220,8 @@ def fast_train(self): shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, - ) - + ) + num_batches = len(dataloader) print("num batchs: %d\n" % num_batches) @@ -202,109 +236,228 @@ def fast_train(self): # do negative sampling bs = len(walks) neg_nodes = torch.LongTensor( - np.random.choice(self.dataset.neg_table, - bs * num_pos * self.args.negative, - replace=True)) + np.random.choice( + self.dataset.neg_table, + bs * num_pos * self.args.negative, + replace=True, + ) + ) self.emb_model.fast_learn(walks, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: - print("Batch %d training time: %.2fs loss: %.4f" \ - % (i, time.time()-start, -sum(self.emb_model.loss)/self.args.print_interval)) + print( + "Batch %d training time: %.2fs loss: %.4f" + % ( + i, + time.time() - start, + -sum(self.emb_model.loss) + / self.args.print_interval, + ) + ) self.emb_model.loss = [] else: - print("Batch %d, training time: %.2fs" % (i, time.time()-start)) + print( + "Batch %d, training time: %.2fs" + % (i, time.time() - start) + ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() - print("Training used time: %.2fs" % (time.time()-start_all)) + print("Training used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_txt: - self.emb_model.save_embedding_txt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_txt( + self.dataset, self.args.output_emb_file + ) elif self.args.save_in_pt: - self.emb_model.save_embedding_pt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_pt( + self.dataset, self.args.output_emb_file + ) else: - self.emb_model.save_embedding(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding( + self.dataset, self.args.output_emb_file + ) -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser(description="DeepWalk") # input files ## personal datasets - parser.add_argument('--data_file', type=str, - help="path of the txt network file, builtin dataset include youtube-net and blog-net") + parser.add_argument( + "--data_file", + type=str, + help="path of the txt network file, builtin dataset include youtube-net and blog-net", + ) ## ogbl datasets - parser.add_argument('--ogbl_name', type=str, - help="name of ogbl dataset, e.g. ogbl-ddi") - parser.add_argument('--load_from_ogbl', default=False, action="store_true", - help="whether load dataset from ogbl") + parser.add_argument( + "--ogbl_name", type=str, help="name of ogbl dataset, e.g. ogbl-ddi" + ) + parser.add_argument( + "--load_from_ogbl", + default=False, + action="store_true", + help="whether load dataset from ogbl", + ) # output files - parser.add_argument('--save_in_txt', default=False, action="store_true", - help='Whether save dat in txt format or npy') - parser.add_argument('--save_in_pt', default=False, action="store_true", - help='Whether save dat in pt format or npy') - parser.add_argument('--output_emb_file', type=str, default="emb.npy", - help='path of the output npy embedding file') - parser.add_argument('--map_file', type=str, default="nodeid_to_index.pickle", - help='path of the mapping dict that maps node ids to embedding index') - parser.add_argument('--norm', default=False, action="store_true", - help="whether to do normalization over node embedding after training") - + parser.add_argument( + "--save_in_txt", + default=False, + action="store_true", + help="Whether save dat in txt format or npy", + ) + parser.add_argument( + "--save_in_pt", + default=False, + action="store_true", + help="Whether save dat in pt format or npy", + ) + parser.add_argument( + "--output_emb_file", + type=str, + default="emb.npy", + help="path of the output npy embedding file", + ) + parser.add_argument( + "--map_file", + type=str, + default="nodeid_to_index.pickle", + help="path of the mapping dict that maps node ids to embedding index", + ) + parser.add_argument( + "--norm", + default=False, + action="store_true", + help="whether to do normalization over node embedding after training", + ) + # model parameters - parser.add_argument('--dim', default=128, type=int, - help="embedding dimensions") - parser.add_argument('--window_size', default=5, type=int, - help="context window size") - parser.add_argument('--use_context_weight', default=False, action="store_true", - help="whether to add weights over nodes in the context window") - parser.add_argument('--num_walks', default=10, type=int, - help="number of walks for each node") - parser.add_argument('--negative', default=1, type=int, - help="negative samples for each positve node pair") - parser.add_argument('--batch_size', default=128, type=int, - help="number of node sequences in each batch") - parser.add_argument('--walk_length', default=80, type=int, - help="number of nodes in a sequence") - parser.add_argument('--neg_weight', default=1., type=float, - help="negative weight") - parser.add_argument('--lap_norm', default=0.01, type=float, - help="weight of laplacian normalization, recommend to set as 0.1 / windoe_size") - + parser.add_argument( + "--dim", default=128, type=int, help="embedding dimensions" + ) + parser.add_argument( + "--window_size", default=5, type=int, help="context window size" + ) + parser.add_argument( + "--use_context_weight", + default=False, + action="store_true", + help="whether to add weights over nodes in the context window", + ) + parser.add_argument( + "--num_walks", + default=10, + type=int, + help="number of walks for each node", + ) + parser.add_argument( + "--negative", + default=1, + type=int, + help="negative samples for each positve node pair", + ) + parser.add_argument( + "--batch_size", + default=128, + type=int, + help="number of node sequences in each batch", + ) + parser.add_argument( + "--walk_length", + default=80, + type=int, + help="number of nodes in a sequence", + ) + parser.add_argument( + "--neg_weight", default=1.0, type=float, help="negative weight" + ) + parser.add_argument( + "--lap_norm", + default=0.01, + type=float, + help="weight of laplacian normalization, recommend to set as 0.1 / windoe_size", + ) + # training parameters - parser.add_argument('--print_interval', default=100, type=int, - help="number of batches between printing") - parser.add_argument('--print_loss', default=False, action="store_true", - help="whether print loss during training") - parser.add_argument('--lr', default=0.2, type=float, - help="learning rate") - + parser.add_argument( + "--print_interval", + default=100, + type=int, + help="number of batches between printing", + ) + parser.add_argument( + "--print_loss", + default=False, + action="store_true", + help="whether print loss during training", + ) + parser.add_argument("--lr", default=0.2, type=float, help="learning rate") + # optimization settings - parser.add_argument('--mix', default=False, action="store_true", - help="mixed training with CPU and GPU") - parser.add_argument('--gpus', type=int, default=[-1], nargs='+', - help='a list of active gpu ids, e.g. 0, used with --mix') - parser.add_argument('--only_cpu', default=False, action="store_true", - help="training with CPU") - parser.add_argument('--only_gpu', default=False, action="store_true", - help="training with GPU") - parser.add_argument('--async_update', default=False, action="store_true", - help="mixed training asynchronously, not recommended") - - parser.add_argument('--true_neg', default=False, action="store_true", - help="If not specified, this program will use " - "a faster negative sampling method, " - "but the samples might be false negative " - "with a small probability. If specified, " - "this program will generate a true negative sample table," - "and select from it when doing negative samling") - parser.add_argument('--num_threads', default=8, type=int, - help="number of threads used for each CPU-core/GPU") - parser.add_argument('--num_sampler_threads', default=2, type=int, - help="number of threads used for sampling") - - parser.add_argument('--count_params', default=False, action="store_true", - help="count the params, exit once counting over") + parser.add_argument( + "--mix", + default=False, + action="store_true", + help="mixed training with CPU and GPU", + ) + parser.add_argument( + "--gpus", + type=int, + default=[-1], + nargs="+", + help="a list of active gpu ids, e.g. 0, used with --mix", + ) + parser.add_argument( + "--only_cpu", + default=False, + action="store_true", + help="training with CPU", + ) + parser.add_argument( + "--only_gpu", + default=False, + action="store_true", + help="training with GPU", + ) + parser.add_argument( + "--async_update", + default=False, + action="store_true", + help="mixed training asynchronously, not recommended", + ) + + parser.add_argument( + "--true_neg", + default=False, + action="store_true", + help="If not specified, this program will use " + "a faster negative sampling method, " + "but the samples might be false negative " + "with a small probability. If specified, " + "this program will generate a true negative sample table," + "and select from it when doing negative samling", + ) + parser.add_argument( + "--num_threads", + default=8, + type=int, + help="number of threads used for each CPU-core/GPU", + ) + parser.add_argument( + "--num_sampler_threads", + default=2, + type=int, + help="number of threads used for sampling", + ) + + parser.add_argument( + "--count_params", + default=False, + action="store_true", + help="count the params, exit once counting over", + ) args = parser.parse_args() args.fast_neg = not args.true_neg diff --git a/examples/pytorch/ogb/deepwalk/load_dataset.py b/examples/pytorch/ogb/deepwalk/load_dataset.py index defa6543e879..8b3dac5c89c6 100644 --- a/examples/pytorch/ogb/deepwalk/load_dataset.py +++ b/examples/pytorch/ogb/deepwalk/load_dataset.py @@ -1,21 +1,27 @@ """ load dataset from ogb """ import argparse -from ogb.linkproppred import DglLinkPropPredDataset import time -def load_from_ogbl_with_name(name): - choices = ['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa', 'ogbl-citation'] +from ogb.linkproppred import DglLinkPropPredDataset + + +def load_from_ogbl_with_name(name): + choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"] assert name in choices, "name must be selected from " + str(choices) dataset = DglLinkPropPredDataset(name) return dataset[0] + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--name', type=str, - choices=['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa', 'ogbl-citation'], - default='ogbl-collab', - help="name of datasets by ogb") + parser.add_argument( + "--name", + type=str, + choices=["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"], + default="ogbl-collab", + help="name of datasets by ogb", + ) args = parser.parse_args() print("loading graph... it might take some time") @@ -23,28 +29,32 @@ def load_from_ogbl_with_name(name): g = load_from_ogbl_with_name(name=name) try: - w = g.edata['edge_weight'] + w = g.edata["edge_weight"] weighted = True except: weighted = False - edge_num = g.edges()[0].shape[0] src = list(g.edges()[0]) tgt = list(g.edges()[1]) if weighted: - weight = list(g.edata['edge_weight']) + weight = list(g.edata["edge_weight"]) print("writing...") start_time = time.time() with open(name + "-net.txt", "w") as f: for i in range(edge_num): if weighted: - f.write(str(src[i].item()) + " "\ - +str(tgt[i].item()) + " "\ - +str(weight[i].item()) + "\n") + f.write( + str(src[i].item()) + + " " + + str(tgt[i].item()) + + " " + + str(weight[i].item()) + + "\n" + ) else: - f.write(str(src[i].item()) + " "\ - +str(tgt[i].item()) + " "\ - +"1\n") - print("writing used time: %d s" % int(time.time() - start_time)) \ No newline at end of file + f.write( + str(src[i].item()) + " " + str(tgt[i].item()) + " " + "1\n" + ) + print("writing used time: %d s" % int(time.time() - start_time)) diff --git a/examples/pytorch/ogb/deepwalk/model.py b/examples/pytorch/ogb/deepwalk/model.py index 466b10978348..bb701901866f 100644 --- a/examples/pytorch/ogb/deepwalk/model.py +++ b/examples/pytorch/ogb/deepwalk/model.py @@ -1,16 +1,17 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn import init import random + import numpy as np +import torch import torch.multiprocessing as mp +import torch.nn as nn +import torch.nn.functional as F from torch.multiprocessing import Queue +from torch.nn import init def init_emb2pos_index(walk_length, window_size, batch_size): - ''' select embedding of positive nodes from a batch of node embeddings - + """select embedding of positive nodes from a batch of node embeddings + Return ------ index_emb_posu torch.LongTensor : the indices of u_embeddings @@ -20,12 +21,12 @@ def init_emb2pos_index(walk_length, window_size, batch_size): ----- # emb_u.shape: [batch_size * walk_length, dim] batch_emb2posu = torch.index_select(emb_u, 0, index_emb_posu) - ''' + """ idx_list_u = [] idx_list_v = [] for b in range(batch_size): for i in range(walk_length): - for j in range(i-window_size, i): + for j in range(i - window_size, i): if j >= 0: idx_list_u.append(j + b * walk_length) idx_list_v.append(i + b * walk_length) @@ -40,10 +41,11 @@ def init_emb2pos_index(walk_length, window_size, batch_size): return index_emb_posu, index_emb_posv + def init_emb2neg_index(walk_length, window_size, negative, batch_size): - '''select embedding of negative nodes from a batch of node embeddings + """select embedding of negative nodes from a batch of node embeddings for fast negative sampling - + Return ------ index_emb_negu torch.LongTensor : the indices of u_embeddings @@ -53,21 +55,22 @@ def init_emb2neg_index(walk_length, window_size, negative, batch_size): ----- # emb_u.shape: [batch_size * walk_length, dim] batch_emb2negu = torch.index_select(emb_u, 0, index_emb_negu) - ''' + """ idx_list_u = [] for b in range(batch_size): for i in range(walk_length): - for j in range(i-window_size, i): + for j in range(i - window_size, i): if j >= 0: idx_list_u += [i + b * walk_length] * negative - for j in range(i+1, i+1+window_size): + for j in range(i + 1, i + 1 + window_size): if j < walk_length: idx_list_u += [i + b * walk_length] * negative - - idx_list_v = list(range(batch_size * walk_length))\ - * negative * window_size * 2 + + idx_list_v = ( + list(range(batch_size * walk_length)) * negative * window_size * 2 + ) random.shuffle(idx_list_v) - idx_list_v = idx_list_v[:len(idx_list_u)] + idx_list_v = idx_list_v[: len(idx_list_u)] # [bs * walk_length * negative] index_emb_negu = torch.LongTensor(idx_list_u) @@ -75,42 +78,46 @@ def init_emb2neg_index(walk_length, window_size, negative, batch_size): return index_emb_negu, index_emb_negv + def init_weight(walk_length, window_size, batch_size): - ''' init context weight ''' + """init context weight""" weight = [] for b in range(batch_size): for i in range(walk_length): - for j in range(i-window_size, i): + for j in range(i - window_size, i): if j >= 0: - weight.append(1. - float(i - j - 1)/float(window_size)) + weight.append(1.0 - float(i - j - 1) / float(window_size)) for j in range(i + 1, i + 1 + window_size): if j < walk_length: - weight.append(1. - float(j - i - 1)/float(window_size)) + weight.append(1.0 - float(j - i - 1) / float(window_size)) # [num_pos * batch_size] return torch.Tensor(weight).unsqueeze(1) + def init_empty_grad(emb_dimension, walk_length, batch_size): - """ initialize gradient matrix """ + """initialize gradient matrix""" grad_u = torch.zeros((batch_size * walk_length, emb_dimension)) grad_v = torch.zeros((batch_size * walk_length, emb_dimension)) return grad_u, grad_v + def adam(grad, state_sum, nodes, lr, device, only_gpu): - """ calculate gradients according to adam """ + """calculate gradients according to adam""" grad_sum = (grad * grad).mean(1) if not only_gpu: grad_sum = grad_sum.cpu() - state_sum.index_add_(0, nodes, grad_sum) # cpu + state_sum.index_add_(0, nodes, grad_sum) # cpu std = state_sum[nodes].to(device) # gpu std_values = std.sqrt_().add_(1e-10).unsqueeze(1) - grad = (lr * grad / std_values) # gpu + grad = lr * grad / std_values # gpu return grad + def async_update(num_threads, model, queue): - """ asynchronous embedding update """ + """asynchronous embedding update""" torch.set_num_threads(num_threads) while True: (grad_u, grad_v, grad_v_neg, nodes, neg_nodes) = queue.get() @@ -120,12 +127,17 @@ def async_update(num_threads, model, queue): model.u_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_u) model.v_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_v) if neg_nodes is not None: - model.v_embeddings.weight.data.index_add_(0, neg_nodes.view(-1), grad_v_neg) + model.v_embeddings.weight.data.index_add_( + 0, neg_nodes.view(-1), grad_v_neg + ) + class SkipGramModel(nn.Module): - """ Negative sampling based skip-gram """ - def __init__(self, - emb_size, + """Negative sampling based skip-gram""" + + def __init__( + self, + emb_size, emb_dimension, walk_length, window_size, @@ -143,8 +155,8 @@ def __init__(self, use_context_weight, async_update, num_threads, - ): - """ initialize embedding on CPU + ): + """initialize embedding on CPU Paremeters ---------- @@ -185,16 +197,18 @@ def __init__(self, self.use_context_weight = use_context_weight self.async_update = async_update self.num_threads = num_threads - + # initialize the device as cpu self.device = torch.device("cpu") # content embedding self.u_embeddings = nn.Embedding( - self.emb_size, self.emb_dimension, sparse=True) + self.emb_size, self.emb_dimension, sparse=True + ) # context embedding self.v_embeddings = nn.Embedding( - self.emb_size, self.emb_dimension, sparse=True) + self.emb_size, self.emb_dimension, sparse=True + ) # initialze embedding initrange = 1.0 / self.emb_dimension init.uniform_(self.u_embeddings.weight.data, -initrange, initrange) @@ -202,28 +216,26 @@ def __init__(self, # lookup_table is used for fast sigmoid computing self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) - self.lookup_table[0] = 0. - self.lookup_table[-1] = 1. + self.lookup_table[0] = 0.0 + self.lookup_table[-1] = 1.0 if self.record_loss: - self.logsigmoid_table = torch.log(torch.sigmoid(torch.arange(-6.01, 6.01, 0.01))) + self.logsigmoid_table = torch.log( + torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) + ) self.loss = [] # indexes to select positive/negative node pairs from batch_walks self.index_emb_posu, self.index_emb_posv = init_emb2pos_index( - self.walk_length, - self.window_size, - self.batch_size) + self.walk_length, self.window_size, self.batch_size + ) self.index_emb_negu, self.index_emb_negv = init_emb2neg_index( - self.walk_length, - self.window_size, - self.negative, - self.batch_size) + self.walk_length, self.window_size, self.negative, self.batch_size + ) if self.use_context_weight: self.context_weight = init_weight( - self.walk_length, - self.window_size, - self.batch_size) + self.walk_length, self.window_size, self.batch_size + ) # adam self.state_sum_u = torch.zeros(self.emb_size) @@ -231,32 +243,31 @@ def __init__(self, # gradients of nodes in batch_walks self.grad_u, self.grad_v = init_empty_grad( - self.emb_dimension, - self.walk_length, - self.batch_size) + self.emb_dimension, self.walk_length, self.batch_size + ) def create_async_update(self): - """ Set up the async update subprocess. - """ + """Set up the async update subprocess.""" self.async_q = Queue(1) - self.async_p = mp.Process(target=async_update, args=(self.num_threads, self, self.async_q)) + self.async_p = mp.Process( + target=async_update, args=(self.num_threads, self, self.async_q) + ) self.async_p.start() def finish_async_update(self): - """ Notify the async update subprocess to quit. - """ + """Notify the async update subprocess to quit.""" self.async_q.put((None, None, None, None, None)) self.async_p.join() def share_memory(self): - """ share the parameters across subprocesses """ + """share the parameters across subprocesses""" self.u_embeddings.weight.share_memory_() self.v_embeddings.weight.share_memory_() self.state_sum_u.share_memory_() self.state_sum_v.share_memory_() def set_device(self, gpu_id): - """ set gpu device """ + """set gpu device""" self.device = torch.device("cuda:%d" % gpu_id) print("The device is", self.device) self.lookup_table = self.lookup_table.to(self.device) @@ -272,7 +283,7 @@ def set_device(self, gpu_id): self.context_weight = self.context_weight.to(self.device) def all_to_device(self, gpu_id): - """ move all of the parameters to a single GPU """ + """move all of the parameters to a single GPU""" self.device = torch.device("cuda:%d" % gpu_id) self.set_device(gpu_id) self.u_embeddings = self.u_embeddings.cuda(gpu_id) @@ -281,17 +292,17 @@ def all_to_device(self, gpu_id): self.state_sum_v = self.state_sum_v.to(self.device) def fast_sigmoid(self, score): - """ do fast sigmoid by looking up in a pre-defined table """ + """do fast sigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.lookup_table[idx] def fast_logsigmoid(self, score): - """ do fast logsigmoid by looking up in a pre-defined table """ + """do fast logsigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.logsigmoid_table[idx] def fast_learn(self, batch_walks, neg_nodes=None): - """ Learn a batch of random walks in a fast way. It has the following features: + """Learn a batch of random walks in a fast way. It has the following features: 1. It calculating the gradients directly without the forward operation. 2. It does sigmoid by a looking up table. @@ -310,7 +321,7 @@ def fast_learn(self, batch_walks, neg_nodes=None): Usage example ------------- - batch_walks = [torch.LongTensor([1,2,3,4]), + batch_walks = [torch.LongTensor([1,2,3,4]), torch.LongTensor([2,3,4,2])]) lr = 0.01 neg_nodes = None @@ -326,16 +337,23 @@ def fast_learn(self, batch_walks, neg_nodes=None): nodes = nodes.to(self.device) if neg_nodes is not None: neg_nodes = neg_nodes.to(self.device) - emb_u = self.u_embeddings(nodes).view(-1, self.emb_dimension).to(self.device) - emb_v = self.v_embeddings(nodes).view(-1, self.emb_dimension).to(self.device) + emb_u = ( + self.u_embeddings(nodes) + .view(-1, self.emb_dimension) + .to(self.device) + ) + emb_v = ( + self.v_embeddings(nodes) + .view(-1, self.emb_dimension) + .to(self.device) + ) ## Postive bs = len(batch_walks) if bs < self.batch_size: index_emb_posu, index_emb_posv = init_emb2pos_index( - self.walk_length, - self.window_size, - bs) + self.walk_length, self.window_size, bs + ) index_emb_posu = index_emb_posu.to(self.device) index_emb_posv = index_emb_posv.to(self.device) else: @@ -356,8 +374,12 @@ def fast_learn(self, batch_walks, neg_nodes=None): # [batch_size * num_pos, dim] if self.lap_norm > 0: - grad_u_pos = score * emb_pos_v + self.lap_norm * (emb_pos_v - emb_pos_u) - grad_v_pos = score * emb_pos_u + self.lap_norm * (emb_pos_u - emb_pos_v) + grad_u_pos = score * emb_pos_v + self.lap_norm * ( + emb_pos_v - emb_pos_u + ) + grad_v_pos = score * emb_pos_u + self.lap_norm * ( + emb_pos_u - emb_pos_v + ) else: grad_u_pos = score * emb_pos_v grad_v_pos = score * emb_pos_u @@ -365,9 +387,8 @@ def fast_learn(self, batch_walks, neg_nodes=None): if self.use_context_weight: if bs < self.batch_size: context_weight = init_weight( - self.walk_length, - self.window_size, - bs).to(self.device) + self.walk_length, self.window_size, bs + ).to(self.device) else: context_weight = self.context_weight grad_u_pos *= context_weight @@ -376,9 +397,8 @@ def fast_learn(self, batch_walks, neg_nodes=None): # [batch_size * walk_length, dim] if bs < self.batch_size: grad_u, grad_v = init_empty_grad( - self.emb_dimension, - self.walk_length, - bs) + self.emb_dimension, self.walk_length, bs + ) grad_u = grad_u.to(self.device) grad_v = grad_v.to(self.device) else: @@ -394,14 +414,15 @@ def fast_learn(self, batch_walks, neg_nodes=None): ## Negative if bs < self.batch_size: index_emb_negu, index_emb_negv = init_emb2neg_index( - self.walk_length, self.window_size, self.negative, bs) + self.walk_length, self.window_size, self.negative, bs + ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: index_emb_negu = self.index_emb_negu index_emb_negv = self.index_emb_negv emb_neg_u = torch.index_select(emb_u, 0, index_emb_negu) - + if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: @@ -411,9 +432,13 @@ def fast_learn(self, batch_walks, neg_nodes=None): neg_score = torch.sum(torch.mul(emb_neg_u, emb_neg_v), dim=1) neg_score = torch.clamp(neg_score, max=6, min=-6) # [batch_size * walk_length * negative, 1] - score = - self.fast_sigmoid(neg_score).unsqueeze(1) + score = -self.fast_sigmoid(neg_score).unsqueeze(1) if self.record_loss: - self.loss.append(self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item()) + self.loss.append( + self.negative + * self.neg_weight + * torch.mean(self.fast_logsigmoid(-neg_score)).item() + ) grad_u_neg = self.neg_weight * score * emb_neg_v grad_v_neg = self.neg_weight * score * emb_neg_u @@ -426,10 +451,21 @@ def fast_learn(self, batch_walks, neg_nodes=None): nodes = nodes.view(-1) # use adam optimizer - grad_u = adam(grad_u, self.state_sum_u, nodes, lr, self.device, self.only_gpu) - grad_v = adam(grad_v, self.state_sum_v, nodes, lr, self.device, self.only_gpu) + grad_u = adam( + grad_u, self.state_sum_u, nodes, lr, self.device, self.only_gpu + ) + grad_v = adam( + grad_v, self.state_sum_v, nodes, lr, self.device, self.only_gpu + ) if neg_nodes is not None: - grad_v_neg = adam(grad_v_neg, self.state_sum_v, neg_nodes, lr, self.device, self.only_gpu) + grad_v_neg = adam( + grad_v_neg, + self.state_sum_v, + neg_nodes, + lr, + self.device, + self.only_gpu, + ) if self.mixed_train: grad_u = grad_u.cpu() @@ -447,16 +483,18 @@ def fast_learn(self, batch_walks, neg_nodes=None): neg_nodes.share_memory_() grad_v_neg.share_memory_() self.async_q.put((grad_u, grad_v, grad_v_neg, nodes, neg_nodes)) - + if not self.async_update: self.u_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_u) - self.v_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_v) + self.v_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_v) if neg_nodes is not None: - self.v_embeddings.weight.data.index_add_(0, neg_nodes.view(-1), grad_v_neg) + self.v_embeddings.weight.data.index_add_( + 0, neg_nodes.view(-1), grad_v_neg + ) return def forward(self, pos_u, pos_v, neg_v): - ''' Do forward and backward. It is designed for future use. ''' + """Do forward and backward. It is designed for future use.""" emb_u = self.u_embeddings(pos_u) emb_v = self.v_embeddings(pos_v) emb_neg_v = self.v_embeddings(neg_v) @@ -469,11 +507,11 @@ def forward(self, pos_u, pos_v, neg_v): neg_score = torch.clamp(neg_score, max=6, min=-6) neg_score = -torch.sum(F.logsigmoid(-neg_score), dim=1) - #return torch.mean(score + neg_score) + # return torch.mean(score + neg_score) return torch.sum(score), torch.sum(neg_score) def save_embedding(self, dataset, file_name): - """ Write embedding to local file. Only used when node ids are numbers. + """Write embedding to local file. Only used when node ids are numbers. Parameter --------- @@ -482,42 +520,55 @@ def save_embedding(self, dataset, file_name): """ embedding = self.u_embeddings.weight.cpu().data.numpy() if self.norm: - embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape(-1, 1) + embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( + -1, 1 + ) np.save(file_name, embedding) def save_embedding_pt(self, dataset, file_name): - """ For ogb leaderboard. - """ + """For ogb leaderboard.""" try: max_node_id = max(dataset.node2id.keys()) if max_node_id + 1 != self.emb_size: print("WARNING: The node ids are not serial.") embedding = torch.zeros(max_node_id + 1, self.emb_dimension) - index = torch.LongTensor(list(map(lambda id: dataset.id2node[id], list(range(self.emb_size))))) + index = torch.LongTensor( + list( + map( + lambda id: dataset.id2node[id], + list(range(self.emb_size)), + ) + ) + ) embedding.index_add_(0, index, self.u_embeddings.weight.cpu().data) if self.norm: - embedding /= torch.sqrt(torch.sum(embedding.mul(embedding), 1) + 1e-6).unsqueeze(1) + embedding /= torch.sqrt( + torch.sum(embedding.mul(embedding), 1) + 1e-6 + ).unsqueeze(1) torch.save(embedding, file_name) except: self.save_embedding_pt_dgl_graph(dataset, file_name) def save_embedding_pt_dgl_graph(self, dataset, file_name): - """ For ogb leaderboard """ + """For ogb leaderboard""" embedding = torch.zeros_like(self.u_embeddings.weight.cpu().data) valid_seeds = torch.LongTensor(dataset.valid_seeds) - valid_embedding = self.u_embeddings.weight.cpu().data.index_select(0, - valid_seeds) + valid_embedding = self.u_embeddings.weight.cpu().data.index_select( + 0, valid_seeds + ) embedding.index_add_(0, valid_seeds, valid_embedding) if self.norm: - embedding /= torch.sqrt(torch.sum(embedding.mul(embedding), 1) + 1e-6).unsqueeze(1) + embedding /= torch.sqrt( + torch.sum(embedding.mul(embedding), 1) + 1e-6 + ).unsqueeze(1) torch.save(embedding, file_name) def save_embedding_txt(self, dataset, file_name): - """ Write embedding to local file. For future use. + """Write embedding to local file. For future use. Parameter --------- @@ -526,9 +577,11 @@ def save_embedding_txt(self, dataset, file_name): """ embedding = self.u_embeddings.weight.cpu().data.numpy() if self.norm: - embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape(-1, 1) - with open(file_name, 'w') as f: - f.write('%d %d\n' % (self.emb_size, self.emb_dimension)) + embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( + -1, 1 + ) + with open(file_name, "w") as f: + f.write("%d %d\n" % (self.emb_size, self.emb_dimension)) for wid in range(self.emb_size): - e = ' '.join(map(lambda x: str(x), embedding[wid])) - f.write('%s %s\n' % (str(dataset.id2node[wid]), e)) + e = " ".join(map(lambda x: str(x), embedding[wid])) + f.write("%s %s\n" % (str(dataset.id2node[wid]), e)) diff --git a/examples/pytorch/ogb/deepwalk/reading_data.py b/examples/pytorch/ogb/deepwalk/reading_data.py index 7c042b5e1e65..11b326370424 100644 --- a/examples/pytorch/ogb/deepwalk/reading_data.py +++ b/examples/pytorch/ogb/deepwalk/reading_data.py @@ -1,18 +1,25 @@ import os +import pickle +import random +import time + import numpy as np import scipy.sparse as sp -import pickle import torch from torch.utils.data import DataLoader -from dgl.data.utils import download, _get_dgl_url, get_download_dir, extract_archive -import random -import time +from utils import shuffle_walks + import dgl +from dgl.data.utils import ( + _get_dgl_url, + download, + extract_archive, + get_download_dir, +) -from utils import shuffle_walks def ReadTxtNet(file_path="", undirected=True): - """ Read the txt network file. + """Read the txt network file. Notations: The network is unweighted. Parameters @@ -23,16 +30,20 @@ def ReadTxtNet(file_path="", undirected=True): Return ------ net dict : a dict recording the connections in the graph - node2id dict : a dict mapping the nodes to their embedding indices + node2id dict : a dict mapping the nodes to their embedding indices id2node dict : a dict mapping nodes embedding indices to the nodes """ - if file_path == 'youtube' or file_path == 'blog': + if file_path == "youtube" or file_path == "blog": name = file_path dir = get_download_dir() - zip_file_path='{}/{}.zip'.format(dir, name) - download(_get_dgl_url(os.path.join('dataset/DeepWalk/', '{}.zip'.format(file_path))), path=zip_file_path) - extract_archive(zip_file_path, - '{}/{}'.format(dir, name)) + zip_file_path = "{}/{}.zip".format(dir, name) + download( + _get_dgl_url( + os.path.join("dataset/DeepWalk/", "{}.zip".format(file_path)) + ), + path=zip_file_path, + ) + extract_archive(zip_file_path, "{}/{}".format(dir, name)) file_path = "{}/{}/{}-net.txt".format(dir, name, name) node2id = {} @@ -46,7 +57,10 @@ def ReadTxtNet(file_path="", undirected=True): with open(file_path, "r") as f: for line in f.readlines(): tup = list(map(int, line.strip().split(" "))) - assert len(tup) in [2, 3], "The format of network file is unrecognizable." + assert len(tup) in [ + 2, + 3, + ], "The format of network file is unrecognizable." if len(tup) == 3: n1, n2, w = tup elif len(tup) == 2: @@ -73,7 +87,7 @@ def ReadTxtNet(file_path="", undirected=True): src.append(n1) dst.append(n2) weight.append(w) - + if undirected: if n2 not in net: net[n2] = {n1: w} @@ -90,16 +104,15 @@ def ReadTxtNet(file_path="", undirected=True): print("edge num: %d" % len(src)) assert max(net.keys()) == len(net) - 1, "error reading net, quit" - sm = sp.coo_matrix( - (np.array(weight), (src, dst)), - dtype=np.float32) + sm = sp.coo_matrix((np.array(weight), (src, dst)), dtype=np.float32) return net, node2id, id2node, sm + def net2graph(net_sm): - """ Transform the network to DGL graph + """Transform the network to DGL graph - Return + Return ------ G DGLGraph : graph by DGL """ @@ -110,30 +123,34 @@ def net2graph(net_sm): print("Building DGLGraph in %.2fs" % t) return G + def make_undirected(G): - #G.readonly(False) + # G.readonly(False) G.add_edges(G.edges()[1], G.edges()[0]) return G + def find_connected_nodes(G): nodes = G.out_degrees().nonzero().squeeze(-1) return nodes + class DeepwalkDataset: - def __init__(self, - net_file, - map_file, - walk_length, - window_size, - num_walks, - batch_size, - negative=5, - gpus=[0], - fast_neg=True, - ogbl_name="", - load_from_ogbl=False, - ): - """ This class has the following functions: + def __init__( + self, + net_file, + map_file, + walk_length, + window_size, + num_walks, + batch_size, + negative=5, + gpus=[0], + fast_neg=True, + ogbl_name="", + load_from_ogbl=False, + ): + """This class has the following functions: 1. Transform the txt network file into DGL graph; 2. Generate random walk sequences for the trainer; 3. Provide the negative table if the user hopes to sample negative @@ -158,8 +175,11 @@ def __init__(self, self.fast_neg = fast_neg if load_from_ogbl: - assert len(gpus) == 1, "ogb.linkproppred is not compatible with multi-gpu training (CUDA error)." + assert ( + len(gpus) == 1 + ), "ogb.linkproppred is not compatible with multi-gpu training (CUDA error)." from load_dataset import load_from_ogbl_with_name + self.G = load_from_ogbl_with_name(ogbl_name) self.G = make_undirected(self.G) else: @@ -173,12 +193,18 @@ def __init__(self, start = time.time() self.valid_seeds = find_connected_nodes(self.G) if len(self.valid_seeds) != self.num_nodes: - print("WARNING: The node ids are not serial. Some nodes are invalid.") - + print( + "WARNING: The node ids are not serial. Some nodes are invalid." + ) + seeds = torch.cat([torch.LongTensor(self.valid_seeds)] * num_walks) - self.seeds = torch.split(shuffle_walks(seeds), - int(np.ceil(len(self.valid_seeds) * self.num_walks / self.num_procs)), - 0) + self.seeds = torch.split( + shuffle_walks(seeds), + int( + np.ceil(len(self.valid_seeds) * self.num_walks / self.num_procs) + ), + 0, + ) end = time.time() t = end - start print("%d seeds in %.2fs" % (len(seeds), t)) @@ -190,7 +216,7 @@ def __init__(self, node_degree /= np.sum(node_degree) node_degree = np.array(node_degree * 1e8, dtype=np.int) self.neg_table = [] - + for idx, node in enumerate(self.valid_seeds): self.neg_table += [node] * node_degree[idx] self.neg_table_size = len(self.neg_table) @@ -198,18 +224,19 @@ def __init__(self, del node_degree def create_sampler(self, i): - """ create random walk sampler """ + """create random walk sampler""" return DeepwalkSampler(self.G, self.seeds[i], self.walk_length) def save_mapping(self, map_file): - """ save the mapping dict that maps node IDs to embedding indices """ + """save the mapping dict that maps node IDs to embedding indices""" with open(map_file, "wb") as f: pickle.dump(self.node2id, f) + class DeepwalkSampler(object): def __init__(self, G, seeds, walk_length): - """ random walk sampler - + """random walk sampler + Parameter --------- G dgl.Graph : the input graph @@ -219,7 +246,9 @@ def __init__(self, G, seeds, walk_length): self.G = G self.seeds = seeds self.walk_length = walk_length - + def sample(self, seeds): - walks = dgl.sampling.random_walk(self.G, seeds, length=self.walk_length-1)[0] + walks = dgl.sampling.random_walk( + self.G, seeds, length=self.walk_length - 1 + )[0] return walks diff --git a/examples/pytorch/ogb/deepwalk/utils.py b/examples/pytorch/ogb/deepwalk/utils.py index 31f10000e65f..19ba674e6ef4 100644 --- a/examples/pytorch/ogb/deepwalk/utils.py +++ b/examples/pytorch/ogb/deepwalk/utils.py @@ -1,11 +1,13 @@ import torch + def shuffle_walks(walks): seeds = torch.randperm(walks.size()[0]) return walks[seeds] + def sum_up_params(model): - """ Count the model parameters """ + """Count the model parameters""" n = [] n.append(model.u_embeddings.weight.cpu().data.numel() * 2) n.append(model.lookup_table.cpu().numel()) diff --git a/examples/pytorch/ogb/directional_GSN/main.py b/examples/pytorch/ogb/directional_GSN/main.py index 671f74c56c46..d423bee70ec1 100644 --- a/examples/pytorch/ogb/directional_GSN/main.py +++ b/examples/pytorch/ogb/directional_GSN/main.py @@ -1,33 +1,47 @@ -from ogb.graphproppred import Evaluator -import torch -import numpy as np -from dgl.dataloading import GraphDataLoader -from tqdm import tqdm -import dgl +import argparse import random + +import numpy as np +import torch import torch.nn as nn -from ogb.graphproppred.mol_encoder import AtomEncoder import torch.nn.functional as F import torch.optim as optim -import argparse -from torch.utils.data import Dataset +from ogb.graphproppred import Evaluator +from ogb.graphproppred.mol_encoder import AtomEncoder from preprocessing import prepare_dataset +from torch.utils.data import Dataset +from tqdm import tqdm + +import dgl +from dgl.dataloading import GraphDataLoader + def aggregate_mean(h, vector_field, h_in): return torch.mean(h, dim=1) + def aggregate_max(h, vector_field, h_in): return torch.max(h, dim=1)[0] + def aggregate_sum(h, vector_field, h_in): return torch.sum(h, dim=1) + def aggregate_dir_dx(h, vector_field, h_in, eig_idx=1): - eig_w = ((vector_field[:, :, eig_idx]) / - (torch.sum(torch.abs(vector_field[:, :, eig_idx]), keepdim=True, dim=1) + 1e-8)).unsqueeze(-1) + eig_w = ( + (vector_field[:, :, eig_idx]) + / ( + torch.sum( + torch.abs(vector_field[:, :, eig_idx]), keepdim=True, dim=1 + ) + + 1e-8 + ) + ).unsqueeze(-1) h_mod = torch.mul(h, eig_w) return torch.abs(torch.sum(h_mod, dim=1) - torch.sum(eig_w, dim=1) * h_in) + class FCLayer(nn.Module): def __init__(self, in_size, out_size): super(FCLayer, self).__init__() @@ -46,6 +60,7 @@ def forward(self, x): h = self.linear(x) return h + class MLP(nn.Module): def __init__(self, in_size, out_size): super(MLP, self).__init__() @@ -58,6 +73,7 @@ def forward(self, x): x = self.fc(x) return x + class DGNLayer(nn.Module): def __init__(self, in_dim, out_dim, dropout, aggregators): super().__init__() @@ -68,36 +84,47 @@ def __init__(self, in_dim, out_dim, dropout, aggregators): self.batchnorm_h = nn.BatchNorm1d(out_dim) self.pretrans = MLP(in_size=2 * in_dim, out_size=in_dim) - self.posttrans = MLP(in_size=(len(aggregators) * 1 + 1) * in_dim, out_size=out_dim) + self.posttrans = MLP( + in_size=(len(aggregators) * 1 + 1) * in_dim, out_size=out_dim + ) def pretrans_edges(self, edges): - z2 = torch.cat([edges.src['h'], edges.dst['h']], dim=1) - vector_field = edges.data['eig'] - return {'e': self.pretrans(z2), 'vector_field': vector_field} + z2 = torch.cat([edges.src["h"], edges.dst["h"]], dim=1) + vector_field = edges.data["eig"] + return {"e": self.pretrans(z2), "vector_field": vector_field} def message_func(self, edges): - return {'e': edges.data['e'], 'vector_field': edges.data['vector_field']} + return { + "e": edges.data["e"], + "vector_field": edges.data["vector_field"], + } def reduce_func(self, nodes): - h_in = nodes.data['h'] - h = nodes.mailbox['e'] + h_in = nodes.data["h"] + h = nodes.mailbox["e"] - vector_field = nodes.mailbox['vector_field'] + vector_field = nodes.mailbox["vector_field"] - h = torch.cat([aggregate(h, vector_field, h_in) for aggregate in self.aggregators], dim=1) + h = torch.cat( + [ + aggregate(h, vector_field, h_in) + for aggregate in self.aggregators + ], + dim=1, + ) - return {'h': h} + return {"h": h} def forward(self, g, h, snorm_n): - g.ndata['h'] = h + g.ndata["h"] = h # pretransformation g.apply_edges(self.pretrans_edges) # aggregation g.update_all(self.message_func, self.reduce_func) - h = torch.cat([h, g.ndata['h']], dim=1) + h = torch.cat([h, g.ndata["h"]], dim=1) # posttransformation h = self.posttrans(h) @@ -111,12 +138,17 @@ def forward(self, g, h, snorm_n): return h -class MLPReadout(nn.Module): +class MLPReadout(nn.Module): def __init__(self, input_dim, output_dim, L=2): # L=nb_hidden_layers super().__init__() - list_FC_layers = [nn.Linear(input_dim // 2 ** l, input_dim // 2 ** (l + 1), bias=True) for l in range(L)] - list_FC_layers.append(nn.Linear(input_dim // 2 ** L, output_dim, bias=True)) + list_FC_layers = [ + nn.Linear(input_dim // 2**l, input_dim // 2 ** (l + 1), bias=True) + for l in range(L) + ] + list_FC_layers.append( + nn.Linear(input_dim // 2**L, output_dim, bias=True) + ) self.FC_layers = nn.ModuleList(list_FC_layers) self.L = L @@ -128,17 +160,38 @@ def forward(self, x): y = self.FC_layers[self.L](y) return y + class DGNNet(nn.Module): def __init__(self, hidden_dim=420, out_dim=420, dropout=0.2, n_layers=4): super().__init__() self.embedding_h = AtomEncoder(emb_dim=hidden_dim) - self.aggregators = [aggregate_mean, aggregate_sum, aggregate_max, aggregate_dir_dx] - - self.layers = nn.ModuleList([DGNLayer(in_dim=hidden_dim, out_dim=hidden_dim, dropout=dropout, - aggregators=self.aggregators) for _ in range(n_layers - 1)]) - self.layers.append(DGNLayer(in_dim=hidden_dim, out_dim=out_dim, dropout=dropout, - aggregators=self.aggregators)) + self.aggregators = [ + aggregate_mean, + aggregate_sum, + aggregate_max, + aggregate_dir_dx, + ] + + self.layers = nn.ModuleList( + [ + DGNLayer( + in_dim=hidden_dim, + out_dim=hidden_dim, + dropout=dropout, + aggregators=self.aggregators, + ) + for _ in range(n_layers - 1) + ] + ) + self.layers.append( + DGNLayer( + in_dim=hidden_dim, + out_dim=out_dim, + dropout=dropout, + aggregators=self.aggregators, + ) + ) # 128 out dim since ogbg-molpcba has 128 tasks self.MLP_layer = MLPReadout(out_dim, 128) @@ -150,32 +203,37 @@ def forward(self, g, h, snorm_n): h_t = conv(g, h, snorm_n) h = h_t - g.ndata['h'] = h + g.ndata["h"] = h - hg = dgl.mean_nodes(g, 'h') + hg = dgl.mean_nodes(g, "h") return self.MLP_layer(hg) def loss(self, scores, labels): is_labeled = labels == labels - loss = nn.BCEWithLogitsLoss()(scores[is_labeled], labels[is_labeled].float()) + loss = nn.BCEWithLogitsLoss()( + scores[is_labeled], labels[is_labeled].float() + ) return loss + def train_epoch(model, optimizer, device, data_loader): model.train() epoch_loss = 0 epoch_train_AP = 0 list_scores = [] list_labels = [] - for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate(data_loader): + for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate( + data_loader + ): batch_graphs = batch_graphs.to(device) - batch_x = batch_graphs.ndata['feat'] # num x feat + batch_x = batch_graphs.ndata["feat"] # num x feat batch_snorm_n = batch_snorm_n.to(device) batch_labels = batch_labels.to(device) optimizer.zero_grad() batch_scores = model(batch_graphs, batch_x, batch_snorm_n) - + loss = model.loss(batch_scores, batch_labels) loss.backward() optimizer.step() @@ -183,14 +241,16 @@ def train_epoch(model, optimizer, device, data_loader): list_scores.append(batch_scores) list_labels.append(batch_labels) - epoch_loss /= (iter + 1) + epoch_loss /= iter + 1 - evaluator = Evaluator(name='ogbg-molpcba') - epoch_train_AP = evaluator.eval({'y_pred': torch.cat(list_scores), - 'y_true': torch.cat(list_labels)})['ap'] + evaluator = Evaluator(name="ogbg-molpcba") + epoch_train_AP = evaluator.eval( + {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)} + )["ap"] return epoch_loss, epoch_train_AP + def evaluate_network(model, device, data_loader): model.eval() epoch_test_loss = 0 @@ -198,9 +258,11 @@ def evaluate_network(model, device, data_loader): with torch.no_grad(): list_scores = [] list_labels = [] - for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate(data_loader): + for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate( + data_loader + ): batch_graphs = batch_graphs.to(device) - batch_x = batch_graphs.ndata['feat'] + batch_x = batch_graphs.ndata["feat"] batch_snorm_n = batch_snorm_n.to(device) batch_labels = batch_labels.to(device) @@ -211,14 +273,16 @@ def evaluate_network(model, device, data_loader): list_scores.append(batch_scores) list_labels.append(batch_labels) - epoch_test_loss /= (iter + 1) + epoch_test_loss /= iter + 1 - evaluator = Evaluator(name='ogbg-molpcba') - epoch_test_AP = evaluator.eval({'y_pred': torch.cat(list_scores), - 'y_true': torch.cat(list_labels)})['ap'] + evaluator = Evaluator(name="ogbg-molpcba") + epoch_test_AP = evaluator.eval( + {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)} + )["ap"] return epoch_test_loss, epoch_test_AP + def train(dataset, params): trainset, valset, testset = dataset.train, dataset.val, dataset.test @@ -236,27 +300,48 @@ def train(dataset, params): print("MODEL DETAILS:\n") for param in model.parameters(): total_param += np.prod(list(param.data.size())) - print('DGN Total parameters:', total_param) + print("DGN Total parameters:", total_param) optimizer = optim.Adam(model.parameters(), lr=0.0008, weight_decay=1e-5) - scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', - factor=0.8, - patience=8, - verbose=True) + scheduler = optim.lr_scheduler.ReduceLROnPlateau( + optimizer, mode="min", factor=0.8, patience=8, verbose=True + ) epoch_train_losses, epoch_val_losses = [], [] epoch_train_APs, epoch_val_APs, epoch_test_APs = [], [], [] - train_loader = GraphDataLoader(trainset, batch_size=params.batch_size, shuffle=True, collate_fn=dataset.collate, pin_memory=True) - val_loader = GraphDataLoader(valset, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True) - test_loader = GraphDataLoader(testset, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True) - - with tqdm(range(450), unit='epoch') as t: + train_loader = GraphDataLoader( + trainset, + batch_size=params.batch_size, + shuffle=True, + collate_fn=dataset.collate, + pin_memory=True, + ) + val_loader = GraphDataLoader( + valset, + batch_size=params.batch_size, + shuffle=False, + collate_fn=dataset.collate, + pin_memory=True, + ) + test_loader = GraphDataLoader( + testset, + batch_size=params.batch_size, + shuffle=False, + collate_fn=dataset.collate, + pin_memory=True, + ) + + with tqdm(range(450), unit="epoch") as t: for epoch in t: - t.set_description('Epoch %d' % epoch) + t.set_description("Epoch %d" % epoch) - epoch_train_loss, epoch_train_ap = train_epoch(model, optimizer, device, train_loader) - epoch_val_loss, epoch_val_ap = evaluate_network(model, device, val_loader) + epoch_train_loss, epoch_train_ap = train_epoch( + model, optimizer, device, train_loader + ) + epoch_val_loss, epoch_val_ap = evaluate_network( + model, device, val_loader + ) epoch_train_losses.append(epoch_train_loss) epoch_val_losses.append(epoch_val_loss) @@ -267,17 +352,20 @@ def train(dataset, params): epoch_test_APs.append(epoch_test_ap.item()) - t.set_postfix(train_loss=epoch_train_loss, - train_AP=epoch_train_ap.item(), val_AP=epoch_val_ap.item(), - refresh=False) + t.set_postfix( + train_loss=epoch_train_loss, + train_AP=epoch_train_ap.item(), + val_AP=epoch_val_ap.item(), + refresh=False, + ) scheduler.step(-epoch_val_ap.item()) - if optimizer.param_groups[0]['lr'] < 1e-5: + if optimizer.param_groups[0]["lr"] < 1e-5: print("\n!! LR EQUAL TO MIN LR SET.") break - print('') + print("") best_val_epoch = np.argmax(np.array(epoch_val_APs)) best_train_epoch = np.argmax(np.array(epoch_train_APs)) @@ -291,6 +379,7 @@ def train(dataset, params): print("Test AP of Best Val: {:.4f}".format(best_val_test_ap)) print("Train AP of Best Val: {:.4f}".format(best_val_train_ap)) + class Subset(object): def __init__(self, dataset, labels, indices): dataset = [dataset[idx] for idx in indices] @@ -308,23 +397,35 @@ def __getitem__(self, item): def __len__(self): return self.len + class PCBADataset(Dataset): def __init__(self, name): print("[I] Loading dataset %s..." % (name)) self.name = name - + self.dataset, self.split_idx = prepare_dataset(name) - print("One hot encoding substructure counts... ", end='') - self.d_id = [1]*self.dataset[0].edata['subgraph_counts'].shape[1] + print("One hot encoding substructure counts... ", end="") + self.d_id = [1] * self.dataset[0].edata["subgraph_counts"].shape[1] for g in self.dataset: - g.edata['eig'] = g.edata['subgraph_counts'].float() - - self.train = Subset(self.dataset, self.split_idx['label'], self.split_idx['train']) - self.val = Subset(self.dataset, self.split_idx['label'], self.split_idx['valid']) - self.test = Subset(self.dataset, self.split_idx['label'], self.split_idx['test']) - - print('train, test, val sizes :', len(self.train), len(self.test), len(self.val)) + g.edata["eig"] = g.edata["subgraph_counts"].float() + + self.train = Subset( + self.dataset, self.split_idx["label"], self.split_idx["train"] + ) + self.val = Subset( + self.dataset, self.split_idx["label"], self.split_idx["valid"] + ) + self.test = Subset( + self.dataset, self.split_idx["label"], self.split_idx["test"] + ) + + print( + "train, test, val sizes :", + len(self.train), + len(self.test), + len(self.val), + ) print("[I] Finished loading.") # form a mini batch from a given list of samples = [(graph, label) pairs] @@ -334,22 +435,36 @@ def collate(self, samples): labels = torch.stack(labels) tab_sizes_n = [g.num_nodes() for g in graphs] - tab_snorm_n = [torch.FloatTensor(size, 1).fill_(1./size) for size in tab_sizes_n] + tab_snorm_n = [ + torch.FloatTensor(size, 1).fill_(1.0 / size) for size in tab_sizes_n + ] snorm_n = torch.cat(tab_snorm_n).sqrt() batched_graph = dgl.batch(graphs) return batched_graph, labels, snorm_n -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--gpu_id', default=0, type=int, help="Please give a value for gpu id") - parser.add_argument('--seed', default=41, type=int, help="Please give a value for seed") - parser.add_argument('--batch_size', default=2048, type=int, help="Please give a value for batch_size") + parser.add_argument( + "--gpu_id", default=0, type=int, help="Please give a value for gpu id" + ) + parser.add_argument( + "--seed", default=41, type=int, help="Please give a value for seed" + ) + parser.add_argument( + "--batch_size", + default=2048, + type=int, + help="Please give a value for batch_size", + ) args = parser.parse_args() # device - args.device = torch.device("cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu") - + args.device = torch.device( + "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu" + ) + # setting seeds random.seed(args.seed) np.random.seed(args.seed) @@ -358,4 +473,4 @@ def collate(self, samples): torch.cuda.manual_seed(args.seed) dataset = PCBADataset("ogbg-molpcba") - train(dataset, args) \ No newline at end of file + train(dataset, args) diff --git a/examples/pytorch/ogb/directional_GSN/preprocessing.py b/examples/pytorch/ogb/directional_GSN/preprocessing.py index b39559a83a93..aa1208d46be6 100644 --- a/examples/pytorch/ogb/directional_GSN/preprocessing.py +++ b/examples/pytorch/ogb/directional_GSN/preprocessing.py @@ -1,50 +1,61 @@ -from ogb.graphproppred import DglGraphPropPredDataset -import torch -import numpy as np -import networkx as nx +import os + import graph_tool as gt import graph_tool.topology as gt_topology +import networkx as nx +import numpy as np +import torch +from ogb.graphproppred import DglGraphPropPredDataset from tqdm import tqdm -import os -from dgl.data.utils import save_graphs, load_graphs + +from dgl.data.utils import load_graphs, save_graphs + def to_undirected(edge_index): - row, col = edge_index.transpose(1,0) + row, col = edge_index.transpose(1, 0) row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0) edge_index = torch.stack([row, col], dim=0) - return edge_index.transpose(1,0).tolist() + return edge_index.transpose(1, 0).tolist() + def induced_edge_automorphism_orbits(edge_list): - - ##### node automorphism orbits ##### + + ##### node automorphism orbits ##### graph = gt.Graph(directed=False) graph.add_edge_list(edge_list) gt.stats.remove_self_loops(graph) gt.stats.remove_parallel_edges(graph) # compute the node automorphism group - aut_group = gt_topology.subgraph_isomorphism(graph, graph, induced=False, subgraph=True, generator=False) + aut_group = gt_topology.subgraph_isomorphism( + graph, graph, induced=False, subgraph=True, generator=False + ) orbit_membership = {} for v in graph.get_vertices(): orbit_membership[v] = v - + # whenever two nodes can be mapped via some automorphism, they are assigned the same orbit for aut in aut_group: for original, node in enumerate(aut): role = min(original, orbit_membership[node]) orbit_membership[node] = role - - orbit_membership_list = [[],[]] + + orbit_membership_list = [[], []] for node, om_curr in orbit_membership.items(): orbit_membership_list[0].append(node) orbit_membership_list[1].append(om_curr) # make orbit list contiguous (i.e. 0,1,2,...O) - _, contiguous_orbit_membership = np.unique(orbit_membership_list[1], return_inverse = True) + _, contiguous_orbit_membership = np.unique( + orbit_membership_list[1], return_inverse=True + ) - orbit_membership = {node: contiguous_orbit_membership[i] for i,node in enumerate(orbit_membership_list[0])} + orbit_membership = { + node: contiguous_orbit_membership[i] + for i, node in enumerate(orbit_membership_list[0]) + } aut_count = len(aut_group) @@ -53,12 +64,14 @@ def induced_edge_automorphism_orbits(edge_list): edge_orbit_membership = dict() edge_orbits2inds = dict() ind = 0 - + edge_list = to_undirected(torch.tensor(graph.get_edges())) # infer edge automorphisms from the node automorphisms - for i,edge in enumerate(edge_list): - edge_orbit = frozenset([orbit_membership[edge[0]], orbit_membership[edge[1]]]) + for i, edge in enumerate(edge_list): + edge_orbit = frozenset( + [orbit_membership[edge[0]], orbit_membership[edge[1]]] + ) if edge_orbit not in edge_orbits2inds: edge_orbits2inds[edge_orbit] = ind ind_edge_orbit = ind @@ -69,78 +82,97 @@ def induced_edge_automorphism_orbits(edge_list): if ind_edge_orbit not in edge_orbit_partition: edge_orbit_partition[ind_edge_orbit] = [tuple(edge)] else: - edge_orbit_partition[ind_edge_orbit] += [tuple(edge)] + edge_orbit_partition[ind_edge_orbit] += [tuple(edge)] edge_orbit_membership[i] = ind_edge_orbit - print('Edge orbit partition of given substructure: {}'.format(edge_orbit_partition)) - print('Number of edge orbits: {}'.format(len(edge_orbit_partition))) - print('Graph (node) automorphism count: {}'.format(aut_count)) - + print( + "Edge orbit partition of given substructure: {}".format( + edge_orbit_partition + ) + ) + print("Number of edge orbits: {}".format(len(edge_orbit_partition))) + print("Graph (node) automorphism count: {}".format(aut_count)) + return graph, edge_orbit_partition, edge_orbit_membership, aut_count + def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict): - + ##### edge structural identifiers ##### - - edge_index = edge_index.transpose(1,0).cpu().numpy() + + edge_index = edge_index.transpose(1, 0).cpu().numpy() edge_dict = {} - for i, edge in enumerate(edge_index): + for i, edge in enumerate(edge_index): edge_dict[tuple(edge)] = i - - subgraph_edges = to_undirected(torch.tensor(subgraph_dict['subgraph'].get_edges().tolist())) + + subgraph_edges = to_undirected( + torch.tensor(subgraph_dict["subgraph"].get_edges().tolist()) + ) G_gt = gt.Graph(directed=False) G_gt.add_edge_list(list(edge_index)) gt.stats.remove_self_loops(G_gt) - gt.stats.remove_parallel_edges(G_gt) - + gt.stats.remove_parallel_edges(G_gt) + # compute all subgraph isomorphisms - sub_iso = gt_topology.subgraph_isomorphism(subgraph_dict['subgraph'], G_gt, induced=True, subgraph=True, generator=True) - - counts = np.zeros((edge_index.shape[0], len(subgraph_dict['orbit_partition']))) - + sub_iso = gt_topology.subgraph_isomorphism( + subgraph_dict["subgraph"], + G_gt, + induced=True, + subgraph=True, + generator=True, + ) + + counts = np.zeros( + (edge_index.shape[0], len(subgraph_dict["orbit_partition"])) + ) + for sub_iso_curr in sub_iso: mapping = sub_iso_curr.get_array() - for i,edge in enumerate(subgraph_edges): - + for i, edge in enumerate(subgraph_edges): + # for every edge in the graph H, find the edge in the subgraph G_S to which it is mapped - # (by finding where its endpoints are matched). + # (by finding where its endpoints are matched). # Then, increase the count of the matched edge w.r.t. the corresponding orbit # Repeat for the reverse edge (the one with the opposite direction) - - edge_orbit = subgraph_dict['orbit_membership'][i] + + edge_orbit = subgraph_dict["orbit_membership"][i] mapped_edge = tuple([mapping[edge[0]], mapping[edge[1]]]) counts[edge_dict[mapped_edge], edge_orbit] += 1 - - counts = counts/subgraph_dict['aut_count'] - + + counts = counts / subgraph_dict["aut_count"] + counts = torch.tensor(counts) - + return counts + def prepare_dataset(name): - + # maximum size of cycle graph k = 8 - path = os.path.join('./', 'dataset', name) - data_folder = os.path.join(path, 'processed') + path = os.path.join("./", "dataset", name) + data_folder = os.path.join(path, "processed") os.makedirs(data_folder, exist_ok=True) - - data_file = os.path.join(data_folder, 'cycle_graph_induced_{}.bin'.format(k)) - + + data_file = os.path.join( + data_folder, "cycle_graph_induced_{}.bin".format(k) + ) + # try to load if os.path.exists(data_file): # load print("Loading dataset from {}".format(data_file)) g_list, split_idx = load_graphs(data_file) - else: # generate + else: # generate g_list, split_idx = generate_dataset(path, name) print("Saving dataset to {}".format(data_file)) save_graphs(data_file, g_list, split_idx) return g_list, split_idx + def generate_dataset(path, name): ### compute the orbits of each substructure in the list, as well as the node automorphism count @@ -152,14 +184,25 @@ def generate_dataset(path, name): edge_lists.append(list(graphs_nx.edges)) for edge_list in edge_lists: - subgraph, orbit_partition, orbit_membership, aut_count = induced_edge_automorphism_orbits(edge_list=edge_list) - subgraph_dicts.append({'subgraph':subgraph, 'orbit_partition': orbit_partition, - 'orbit_membership': orbit_membership, 'aut_count': aut_count}) - + ( + subgraph, + orbit_partition, + orbit_membership, + aut_count, + ) = induced_edge_automorphism_orbits(edge_list=edge_list) + subgraph_dicts.append( + { + "subgraph": subgraph, + "orbit_partition": orbit_partition, + "orbit_membership": orbit_membership, + "aut_count": aut_count, + } + ) + ### load and preprocess dataset dataset = DglGraphPropPredDataset(name=name, root=path) split_idx = dataset.get_idx_split() - + # computation of subgraph isomorphisms & creation of data structure graphs_dgl = list() split_idx["label"] = [] @@ -172,19 +215,25 @@ def generate_dataset(path, name): split_idx["label"] = torch.stack(split_idx["label"]) return graphs_dgl, split_idx - + + def _prepare(g, subgraph_dicts): edge_index = torch.stack(g.edges()) - + identifiers = None for subgraph_dict in subgraph_dicts: counts = subgraph_isomorphism_edge_counts(edge_index, subgraph_dict) - identifiers = counts if identifiers is None else torch.cat((identifiers, counts),1) + identifiers = ( + counts + if identifiers is None + else torch.cat((identifiers, counts), 1) + ) + + g.edata["subgraph_counts"] = identifiers.long() - g.edata['subgraph_counts'] = identifiers.long() - return g -if __name__ == '__main__': - prepare_dataset("ogbg-molpcba") \ No newline at end of file + +if __name__ == "__main__": + prepare_dataset("ogbg-molpcba") diff --git a/examples/pytorch/ogb/line/line.py b/examples/pytorch/ogb/line/line.py index c1beaf708d15..b171a72a52d6 100644 --- a/examples/pytorch/ogb/line/line.py +++ b/examples/pytorch/ogb/line/line.py @@ -1,20 +1,22 @@ -import torch import argparse -import dgl -import torch.multiprocessing as mp -from torch.utils.data import DataLoader import os import random import time -import numpy as np -from reading_data import LineDataset +import numpy as np +import torch +import torch.multiprocessing as mp from model import SkipGramModel -from utils import sum_up_params, check_args +from reading_data import LineDataset +from torch.utils.data import DataLoader +from utils import check_args, sum_up_params + +import dgl + class LineTrainer: def __init__(self, args): - """ Initializing the trainer with the input arguments """ + """Initializing the trainer with the input arguments""" self.args = args self.dataset = LineDataset( net_file=args.data_file, @@ -27,20 +29,22 @@ def __init__(self, args): ogbn_name=args.ogbn_name, load_from_ogbn=args.load_from_ogbn, num_samples=args.num_samples * 1000000, - ) + ) self.emb_size = self.dataset.G.number_of_nodes() self.emb_model = None def init_device_emb(self): - """ set the device before training + """set the device before training will be called once in fast_train_mp / fast_train """ choices = sum([self.args.only_gpu, self.args.only_cpu, self.args.mix]) - assert choices == 1, "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" - + assert ( + choices == 1 + ), "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" + # initializing embedding on CPU self.emb_model = SkipGramModel( - emb_size=self.emb_size, + emb_size=self.emb_size, emb_dimension=self.args.dim, batch_size=self.args.batch_size, only_cpu=self.args.only_cpu, @@ -56,8 +60,8 @@ def init_device_emb(self): record_loss=self.args.print_loss, async_update=self.args.async_update, num_threads=self.args.num_threads, - ) - + ) + torch.set_num_threads(self.args.num_threads) if self.args.only_gpu: print("Run in 1 GPU") @@ -66,20 +70,22 @@ def init_device_emb(self): elif self.args.mix: print("Mix CPU with %d GPU" % len(self.args.gpus)) if len(self.args.gpus) == 1: - assert self.args.gpus[0] >= 0, 'mix CPU with GPU should have avaliable GPU' + assert ( + self.args.gpus[0] >= 0 + ), "mix CPU with GPU should have avaliable GPU" self.emb_model.set_device(self.args.gpus[0]) else: print("Run in CPU process") - + def train(self): - """ train the embedding """ + """train the embedding""" if len(self.args.gpus) > 1: self.fast_train_mp() else: self.fast_train() def fast_train_mp(self): - """ multi-cpu-core or mix cpu & multi-gpu """ + """multi-cpu-core or mix cpu & multi-gpu""" self.init_device_emb() self.emb_model.share_memory() @@ -89,24 +95,30 @@ def fast_train_mp(self): ps = [] for i in range(len(self.args.gpus)): - p = mp.Process(target=self.fast_train_sp, args=(i, self.args.gpus[i])) + p = mp.Process( + target=self.fast_train_sp, args=(i, self.args.gpus[i]) + ) ps.append(p) p.start() for p in ps: p.join() - - print("Used time: %.2fs" % (time.time()-start_all)) + + print("Used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_pt: - self.emb_model.save_embedding_pt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_pt( + self.dataset, self.args.output_emb_file + ) else: - self.emb_model.save_embedding(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding( + self.dataset, self.args.output_emb_file + ) def fast_train_sp(self, rank, gpu_id): - """ a subprocess for fast_train_mp """ + """a subprocess for fast_train_mp""" if self.args.mix: self.emb_model.set_device(gpu_id) - + torch.set_num_threads(self.args.num_threads) if self.args.async_update: self.emb_model.create_async_update() @@ -120,9 +132,12 @@ def fast_train_sp(self, rank, gpu_id): shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, - ) + ) num_batches = len(dataloader) - print("num batchs: %d in process [%d] GPU [%d]" % (num_batches, rank, gpu_id)) + print( + "num batchs: %d in process [%d] GPU [%d]" + % (num_batches, rank, gpu_id) + ) start = time.time() with torch.no_grad(): @@ -133,35 +148,65 @@ def fast_train_sp(self, rank, gpu_id): # do negative sampling bs = edges.size()[0] neg_nodes = torch.LongTensor( - np.random.choice(self.dataset.neg_table, - bs * self.args.negative, - replace=True)) + np.random.choice( + self.dataset.neg_table, + bs * self.args.negative, + replace=True, + ) + ) self.emb_model.fast_learn(edges, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: if self.args.only_fst: - print("GPU-[%d] batch %d time: %.2fs fst-loss: %.4f" \ - % (gpu_id, i, time.time()-start, -sum(self.emb_model.loss_fst)/self.args.print_interval)) + print( + "GPU-[%d] batch %d time: %.2fs fst-loss: %.4f" + % ( + gpu_id, + i, + time.time() - start, + -sum(self.emb_model.loss_fst) + / self.args.print_interval, + ) + ) elif self.args.only_snd: - print("GPU-[%d] batch %d time: %.2fs snd-loss: %.4f" \ - % (gpu_id, i, time.time()-start, -sum(self.emb_model.loss_snd)/self.args.print_interval)) + print( + "GPU-[%d] batch %d time: %.2fs snd-loss: %.4f" + % ( + gpu_id, + i, + time.time() - start, + -sum(self.emb_model.loss_snd) + / self.args.print_interval, + ) + ) else: - print("GPU-[%d] batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" \ - % (gpu_id, i, time.time()-start, \ - -sum(self.emb_model.loss_fst)/self.args.print_interval, \ - -sum(self.emb_model.loss_snd)/self.args.print_interval)) + print( + "GPU-[%d] batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" + % ( + gpu_id, + i, + time.time() - start, + -sum(self.emb_model.loss_fst) + / self.args.print_interval, + -sum(self.emb_model.loss_snd) + / self.args.print_interval, + ) + ) self.emb_model.loss_fst = [] self.emb_model.loss_snd = [] else: - print("GPU-[%d] batch %d time: %.2fs" % (gpu_id, i, time.time()-start)) + print( + "GPU-[%d] batch %d time: %.2fs" + % (gpu_id, i, time.time() - start) + ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() def fast_train(self): - """ fast train with dataloader with only gpu / only cpu""" + """fast train with dataloader with only gpu / only cpu""" self.init_device_emb() if self.args.async_update: @@ -179,8 +224,8 @@ def fast_train(self): shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, - ) - + ) + num_batches = len(dataloader) print("num batchs: %d\n" % num_batches) @@ -194,105 +239,220 @@ def fast_train(self): # do negative sampling bs = edges.size()[0] neg_nodes = torch.LongTensor( - np.random.choice(self.dataset.neg_table, - bs * self.args.negative, - replace=True)) + np.random.choice( + self.dataset.neg_table, + bs * self.args.negative, + replace=True, + ) + ) self.emb_model.fast_learn(edges, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: if self.args.only_fst: - print("Batch %d time: %.2fs fst-loss: %.4f" \ - % (i, time.time()-start, -sum(self.emb_model.loss_fst)/self.args.print_interval)) + print( + "Batch %d time: %.2fs fst-loss: %.4f" + % ( + i, + time.time() - start, + -sum(self.emb_model.loss_fst) + / self.args.print_interval, + ) + ) elif self.args.only_snd: - print("Batch %d time: %.2fs snd-loss: %.4f" \ - % (i, time.time()-start, -sum(self.emb_model.loss_snd)/self.args.print_interval)) + print( + "Batch %d time: %.2fs snd-loss: %.4f" + % ( + i, + time.time() - start, + -sum(self.emb_model.loss_snd) + / self.args.print_interval, + ) + ) else: - print("Batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" \ - % (i, time.time()-start, \ - -sum(self.emb_model.loss_fst)/self.args.print_interval, \ - -sum(self.emb_model.loss_snd)/self.args.print_interval)) + print( + "Batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" + % ( + i, + time.time() - start, + -sum(self.emb_model.loss_fst) + / self.args.print_interval, + -sum(self.emb_model.loss_snd) + / self.args.print_interval, + ) + ) self.emb_model.loss_fst = [] self.emb_model.loss_snd = [] else: - print("Batch %d, training time: %.2fs" % (i, time.time()-start)) + print( + "Batch %d, training time: %.2fs" + % (i, time.time() - start) + ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() - print("Training used time: %.2fs" % (time.time()-start_all)) + print("Training used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_pt: - self.emb_model.save_embedding_pt(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding_pt( + self.dataset, self.args.output_emb_file + ) else: - self.emb_model.save_embedding(self.dataset, self.args.output_emb_file) + self.emb_model.save_embedding( + self.dataset, self.args.output_emb_file + ) -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser(description="Implementation of LINE.") # input files ## personal datasets - parser.add_argument('--data_file', type=str, - help="path of dgl graphs") + parser.add_argument("--data_file", type=str, help="path of dgl graphs") ## ogbl datasets - parser.add_argument('--ogbl_name', type=str, - help="name of ogbl dataset, e.g. ogbl-ddi") - parser.add_argument('--load_from_ogbl', default=False, action="store_true", - help="whether load dataset from ogbl") - parser.add_argument('--ogbn_name', type=str, - help="name of ogbn dataset, e.g. ogbn-proteins") - parser.add_argument('--load_from_ogbn', default=False, action="store_true", - help="whether load dataset from ogbn") + parser.add_argument( + "--ogbl_name", type=str, help="name of ogbl dataset, e.g. ogbl-ddi" + ) + parser.add_argument( + "--load_from_ogbl", + default=False, + action="store_true", + help="whether load dataset from ogbl", + ) + parser.add_argument( + "--ogbn_name", type=str, help="name of ogbn dataset, e.g. ogbn-proteins" + ) + parser.add_argument( + "--load_from_ogbn", + default=False, + action="store_true", + help="whether load dataset from ogbn", + ) # output files - parser.add_argument('--save_in_pt', default=False, action="store_true", - help='Whether save dat in pt format or npy') - parser.add_argument('--output_emb_file', type=str, default="emb.npy", - help='path of the output npy embedding file') + parser.add_argument( + "--save_in_pt", + default=False, + action="store_true", + help="Whether save dat in pt format or npy", + ) + parser.add_argument( + "--output_emb_file", + type=str, + default="emb.npy", + help="path of the output npy embedding file", + ) # model parameters - parser.add_argument('--dim', default=128, type=int, - help="embedding dimensions") - parser.add_argument('--num_samples', default=1, type=int, - help="number of samples during training (million)") - parser.add_argument('--negative', default=1, type=int, - help="negative samples for each positve node pair") - parser.add_argument('--batch_size', default=128, type=int, - help="number of edges in each batch") - parser.add_argument('--neg_weight', default=1., type=float, - help="negative weight") - parser.add_argument('--lap_norm', default=0.01, type=float, - help="weight of laplacian normalization") - + parser.add_argument( + "--dim", default=128, type=int, help="embedding dimensions" + ) + parser.add_argument( + "--num_samples", + default=1, + type=int, + help="number of samples during training (million)", + ) + parser.add_argument( + "--negative", + default=1, + type=int, + help="negative samples for each positve node pair", + ) + parser.add_argument( + "--batch_size", + default=128, + type=int, + help="number of edges in each batch", + ) + parser.add_argument( + "--neg_weight", default=1.0, type=float, help="negative weight" + ) + parser.add_argument( + "--lap_norm", + default=0.01, + type=float, + help="weight of laplacian normalization", + ) + # training parameters - parser.add_argument('--only_fst', default=False, action="store_true", - help="only do first-order proximity embedding") - parser.add_argument('--only_snd', default=False, action="store_true", - help="only do second-order proximity embedding") - parser.add_argument('--print_interval', default=100, type=int, - help="number of batches between printing") - parser.add_argument('--print_loss', default=False, action="store_true", - help="whether print loss during training") - parser.add_argument('--lr', default=0.2, type=float, - help="learning rate") - + parser.add_argument( + "--only_fst", + default=False, + action="store_true", + help="only do first-order proximity embedding", + ) + parser.add_argument( + "--only_snd", + default=False, + action="store_true", + help="only do second-order proximity embedding", + ) + parser.add_argument( + "--print_interval", + default=100, + type=int, + help="number of batches between printing", + ) + parser.add_argument( + "--print_loss", + default=False, + action="store_true", + help="whether print loss during training", + ) + parser.add_argument("--lr", default=0.2, type=float, help="learning rate") + # optimization settings - parser.add_argument('--mix', default=False, action="store_true", - help="mixed training with CPU and GPU") - parser.add_argument('--gpus', type=int, default=[-1], nargs='+', - help='a list of active gpu ids, e.g. 0, used with --mix') - parser.add_argument('--only_cpu', default=False, action="store_true", - help="training with CPU") - parser.add_argument('--only_gpu', default=False, action="store_true", - help="training with a single GPU (all of the parameters are moved on the GPU)") - parser.add_argument('--async_update', default=False, action="store_true", - help="mixed training asynchronously, recommend not to use this") - - parser.add_argument('--fast_neg', default=False, action="store_true", - help="do negative sampling inside a batch") - parser.add_argument('--num_threads', default=2, type=int, - help="number of threads used for each CPU-core/GPU") - parser.add_argument('--num_sampler_threads', default=2, type=int, - help="number of threads used for sampling") + parser.add_argument( + "--mix", + default=False, + action="store_true", + help="mixed training with CPU and GPU", + ) + parser.add_argument( + "--gpus", + type=int, + default=[-1], + nargs="+", + help="a list of active gpu ids, e.g. 0, used with --mix", + ) + parser.add_argument( + "--only_cpu", + default=False, + action="store_true", + help="training with CPU", + ) + parser.add_argument( + "--only_gpu", + default=False, + action="store_true", + help="training with a single GPU (all of the parameters are moved on the GPU)", + ) + parser.add_argument( + "--async_update", + default=False, + action="store_true", + help="mixed training asynchronously, recommend not to use this", + ) + + parser.add_argument( + "--fast_neg", + default=False, + action="store_true", + help="do negative sampling inside a batch", + ) + parser.add_argument( + "--num_threads", + default=2, + type=int, + help="number of threads used for each CPU-core/GPU", + ) + parser.add_argument( + "--num_sampler_threads", + default=2, + type=int, + help="number of threads used for sampling", + ) args = parser.parse_args() diff --git a/examples/pytorch/ogb/line/load_dataset.py b/examples/pytorch/ogb/line/load_dataset.py index 77bbd8382193..a4746d058660 100644 --- a/examples/pytorch/ogb/line/load_dataset.py +++ b/examples/pytorch/ogb/line/load_dataset.py @@ -1,30 +1,51 @@ """ load dataset from ogb """ import argparse + from ogb.linkproppred import DglLinkPropPredDataset from ogb.nodeproppred import DglNodePropPredDataset + import dgl -def load_from_ogbl_with_name(name): - choices = ['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa', 'ogbl-citation'] + +def load_from_ogbl_with_name(name): + choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"] assert name in choices, "name must be selected from " + str(choices) dataset = DglLinkPropPredDataset(name) return dataset[0] -def load_from_ogbn_with_name(name): - choices = ['ogbn-products', 'ogbn-proteins', 'ogbn-arxiv', 'ogbn-papers100M'] + +def load_from_ogbn_with_name(name): + choices = [ + "ogbn-products", + "ogbn-proteins", + "ogbn-arxiv", + "ogbn-papers100M", + ] assert name in choices, "name must be selected from " + str(choices) dataset, label = DglNodePropPredDataset(name)[0] return dataset + if __name__ == "__main__": - """ load datasets as net.txt format """ + """load datasets as net.txt format""" parser = argparse.ArgumentParser() - parser.add_argument('--name', type=str, - choices=['ogbl-collab', 'ogbl-ddi', 'ogbl-ppa', 'ogbl-citation', - 'ogbn-products', 'ogbn-proteins', 'ogbn-arxiv', 'ogbn-papers100M'], - default='ogbl-collab', - help="name of datasets by ogb") + parser.add_argument( + "--name", + type=str, + choices=[ + "ogbl-collab", + "ogbl-ddi", + "ogbl-ppa", + "ogbl-citation", + "ogbn-products", + "ogbn-proteins", + "ogbn-arxiv", + "ogbn-papers100M", + ], + default="ogbl-collab", + help="name of datasets by ogb", + ) args = parser.parse_args() name = args.name @@ -33,4 +54,4 @@ def load_from_ogbn_with_name(name): else: g = load_from_ogbn_with_name(name=name) - dgl.save_graphs(name + "-graph.bin", g) \ No newline at end of file + dgl.save_graphs(name + "-graph.bin", g) diff --git a/examples/pytorch/ogb/line/model.py b/examples/pytorch/ogb/line/model.py index a8d181cb11c0..d707f7647bd1 100644 --- a/examples/pytorch/ogb/line/model.py +++ b/examples/pytorch/ogb/line/model.py @@ -1,16 +1,18 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn import init import random + import numpy as np +import torch import torch.multiprocessing as mp +import torch.nn as nn +import torch.nn.functional as F from torch.multiprocessing import Queue +from torch.nn import init + def init_emb2neg_index(negative, batch_size): - '''select embedding of negative nodes from a batch of node embeddings + """select embedding of negative nodes from a batch of node embeddings for fast negative sampling - + Return ------ index_emb_negu torch.LongTensor : the indices of u_embeddings @@ -20,7 +22,7 @@ def init_emb2neg_index(negative, batch_size): ----- # emb_u.shape: [batch_size, dim] batch_emb2negu = torch.index_select(emb_u, 0, index_emb_negu) - ''' + """ idx_list_u = list(range(batch_size)) * negative idx_list_v = list(range(batch_size)) * negative random.shuffle(idx_list_v) @@ -30,21 +32,22 @@ def init_emb2neg_index(negative, batch_size): return index_emb_negu, index_emb_negv + def adam(grad, state_sum, nodes, lr, device, only_gpu): - """ calculate gradients according to adam """ + """calculate gradients according to adam""" grad_sum = (grad * grad).mean(1) if not only_gpu: grad_sum = grad_sum.cpu() - state_sum.index_add_(0, nodes, grad_sum) # cpu + state_sum.index_add_(0, nodes, grad_sum) # cpu std = state_sum[nodes].to(device) # gpu std_values = std.sqrt_().add_(1e-10).unsqueeze(1) - grad = (lr * grad / std_values) # gpu + grad = lr * grad / std_values # gpu return grad + def async_update(num_threads, model, queue): - """ Asynchronous embedding update for entity embeddings. - """ + """Asynchronous embedding update for entity embeddings.""" torch.set_num_threads(num_threads) print("async start") while True: @@ -53,20 +56,35 @@ def async_update(num_threads, model, queue): return with torch.no_grad(): if first_flag: - model.fst_u_embeddings.weight.data.index_add_(0, nodes[:, 0], grad_u) - model.fst_u_embeddings.weight.data.index_add_(0, nodes[:, 1], grad_v) + model.fst_u_embeddings.weight.data.index_add_( + 0, nodes[:, 0], grad_u + ) + model.fst_u_embeddings.weight.data.index_add_( + 0, nodes[:, 1], grad_v + ) if neg_nodes is not None: - model.fst_u_embeddings.weight.data.index_add_(0, neg_nodes, grad_v_neg) + model.fst_u_embeddings.weight.data.index_add_( + 0, neg_nodes, grad_v_neg + ) else: - model.snd_u_embeddings.weight.data.index_add_(0, nodes[:, 0], grad_u) - model.snd_v_embeddings.weight.data.index_add_(0, nodes[:, 1], grad_v) + model.snd_u_embeddings.weight.data.index_add_( + 0, nodes[:, 0], grad_u + ) + model.snd_v_embeddings.weight.data.index_add_( + 0, nodes[:, 1], grad_v + ) if neg_nodes is not None: - model.snd_v_embeddings.weight.data.index_add_(0, neg_nodes, grad_v_neg) + model.snd_v_embeddings.weight.data.index_add_( + 0, neg_nodes, grad_v_neg + ) + class SkipGramModel(nn.Module): - """ Negative sampling based skip-gram """ - def __init__(self, - emb_size, + """Negative sampling based skip-gram""" + + def __init__( + self, + emb_size, emb_dimension, batch_size, only_cpu, @@ -82,8 +100,8 @@ def __init__(self, record_loss, async_update, num_threads, - ): - """ initialize embedding on CPU + ): + """initialize embedding on CPU Paremeters ---------- @@ -130,7 +148,7 @@ def __init__(self, self.record_loss = record_loss self.async_update = async_update self.num_threads = num_threads - + # initialize the device as cpu self.device = torch.device("cpu") @@ -138,27 +156,38 @@ def __init__(self, initrange = 1.0 / self.emb_dimension if self.fst: self.fst_u_embeddings = nn.Embedding( - self.emb_size, self.emb_dimension, sparse=True) - init.uniform_(self.fst_u_embeddings.weight.data, -initrange, initrange) + self.emb_size, self.emb_dimension, sparse=True + ) + init.uniform_( + self.fst_u_embeddings.weight.data, -initrange, initrange + ) if self.snd: self.snd_u_embeddings = nn.Embedding( - self.emb_size, self.emb_dimension, sparse=True) - init.uniform_(self.snd_u_embeddings.weight.data, -initrange, initrange) + self.emb_size, self.emb_dimension, sparse=True + ) + init.uniform_( + self.snd_u_embeddings.weight.data, -initrange, initrange + ) self.snd_v_embeddings = nn.Embedding( - self.emb_size, self.emb_dimension, sparse=True) + self.emb_size, self.emb_dimension, sparse=True + ) init.constant_(self.snd_v_embeddings.weight.data, 0) # lookup_table is used for fast sigmoid computing self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) - self.lookup_table[0] = 0. - self.lookup_table[-1] = 1. + self.lookup_table[0] = 0.0 + self.lookup_table[-1] = 1.0 if self.record_loss: - self.logsigmoid_table = torch.log(torch.sigmoid(torch.arange(-6.01, 6.01, 0.01))) + self.logsigmoid_table = torch.log( + torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) + ) self.loss_fst = [] self.loss_snd = [] # indexes to select positive/negative node pairs from batch_walks - self.index_emb_negu, self.index_emb_negv = init_emb2neg_index(self.negative, self.batch_size) + self.index_emb_negu, self.index_emb_negv = init_emb2neg_index( + self.negative, self.batch_size + ) # adam if self.fst: @@ -168,20 +197,20 @@ def __init__(self, self.snd_state_sum_v = torch.zeros(self.emb_size) def create_async_update(self): - """ Set up the async update subprocess. - """ + """Set up the async update subprocess.""" self.async_q = Queue(1) - self.async_p = mp.Process(target=async_update, args=(self.num_threads, self, self.async_q)) + self.async_p = mp.Process( + target=async_update, args=(self.num_threads, self, self.async_q) + ) self.async_p.start() def finish_async_update(self): - """ Notify the async update subprocess to quit. - """ + """Notify the async update subprocess to quit.""" self.async_q.put((None, None, None, None, None)) self.async_p.join() def share_memory(self): - """ share the parameters across subprocesses """ + """share the parameters across subprocesses""" if self.fst: self.fst_u_embeddings.weight.share_memory_() self.fst_state_sum_u.share_memory_() @@ -192,7 +221,7 @@ def share_memory(self): self.snd_state_sum_v.share_memory_() def set_device(self, gpu_id): - """ set gpu device """ + """set gpu device""" self.device = torch.device("cuda:%d" % gpu_id) print("The device is", self.device) self.lookup_table = self.lookup_table.to(self.device) @@ -202,7 +231,7 @@ def set_device(self, gpu_id): self.index_emb_negv = self.index_emb_negv.to(self.device) def all_to_device(self, gpu_id): - """ move all of the parameters to a single GPU """ + """move all of the parameters to a single GPU""" self.device = torch.device("cuda:%d" % gpu_id) self.set_device(gpu_id) if self.fst: @@ -215,31 +244,39 @@ def all_to_device(self, gpu_id): self.snd_state_sum_v = self.snd_state_sum_v.to(self.device) def fast_sigmoid(self, score): - """ do fast sigmoid by looking up in a pre-defined table """ + """do fast sigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.lookup_table[idx] def fast_logsigmoid(self, score): - """ do fast logsigmoid by looking up in a pre-defined table """ + """do fast logsigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.logsigmoid_table[idx] def fast_pos_bp(self, emb_pos_u, emb_pos_v, first_flag): - """ get grad for positve samples """ + """get grad for positve samples""" pos_score = torch.sum(torch.mul(emb_pos_u, emb_pos_v), dim=1) pos_score = torch.clamp(pos_score, max=6, min=-6) # [batch_size, 1] score = (1 - self.fast_sigmoid(pos_score)).unsqueeze(1) if self.record_loss: if first_flag: - self.loss_fst.append(torch.mean(self.fast_logsigmoid(pos_score)).item()) + self.loss_fst.append( + torch.mean(self.fast_logsigmoid(pos_score)).item() + ) else: - self.loss_snd.append(torch.mean(self.fast_logsigmoid(pos_score)).item()) + self.loss_snd.append( + torch.mean(self.fast_logsigmoid(pos_score)).item() + ) # [batch_size, dim] if self.lap_norm > 0: - grad_u_pos = score * emb_pos_v + self.lap_norm * (emb_pos_v - emb_pos_u) - grad_v_pos = score * emb_pos_u + self.lap_norm * (emb_pos_u - emb_pos_v) + grad_u_pos = score * emb_pos_v + self.lap_norm * ( + emb_pos_v - emb_pos_u + ) + grad_v_pos = score * emb_pos_u + self.lap_norm * ( + emb_pos_u - emb_pos_v + ) else: grad_u_pos = score * emb_pos_v grad_v_pos = score * emb_pos_u @@ -247,16 +284,24 @@ def fast_pos_bp(self, emb_pos_u, emb_pos_v, first_flag): return grad_u_pos, grad_v_pos def fast_neg_bp(self, emb_neg_u, emb_neg_v, first_flag): - """ get grad for negative samples """ + """get grad for negative samples""" neg_score = torch.sum(torch.mul(emb_neg_u, emb_neg_v), dim=1) neg_score = torch.clamp(neg_score, max=6, min=-6) # [batch_size * negative, 1] - score = - self.fast_sigmoid(neg_score).unsqueeze(1) + score = -self.fast_sigmoid(neg_score).unsqueeze(1) if self.record_loss: if first_flag: - self.loss_fst.append(self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item()) + self.loss_fst.append( + self.negative + * self.neg_weight + * torch.mean(self.fast_logsigmoid(-neg_score)).item() + ) else: - self.loss_snd.append(self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item()) + self.loss_snd.append( + self.negative + * self.neg_weight + * torch.mean(self.fast_logsigmoid(-neg_score)).item() + ) grad_u_neg = self.neg_weight * score * emb_neg_v grad_v_neg = self.neg_weight * score * emb_neg_u @@ -264,7 +309,7 @@ def fast_neg_bp(self, emb_neg_u, emb_neg_v, first_flag): return grad_u_neg, grad_v_neg def fast_learn(self, batch_edges, neg_nodes=None): - """ Learn a batch of edges in a fast way. It has the following features: + """Learn a batch of edges in a fast way. It has the following features: 1. It calculating the gradients directly without the forward operation. 2. It does sigmoid by a looking up table. @@ -296,30 +341,46 @@ def fast_learn(self, batch_edges, neg_nodes=None): bs = len(nodes) if self.fst: - emb_u = self.fst_u_embeddings(nodes[:, 0]).view(-1, self.emb_dimension).to(self.device) - emb_v = self.fst_u_embeddings(nodes[:, 1]).view(-1, self.emb_dimension).to(self.device) + emb_u = ( + self.fst_u_embeddings(nodes[:, 0]) + .view(-1, self.emb_dimension) + .to(self.device) + ) + emb_v = ( + self.fst_u_embeddings(nodes[:, 1]) + .view(-1, self.emb_dimension) + .to(self.device) + ) ## Postive emb_pos_u, emb_pos_v = emb_u, emb_v - grad_u_pos, grad_v_pos = self.fast_pos_bp(emb_pos_u, emb_pos_v, True) + grad_u_pos, grad_v_pos = self.fast_pos_bp( + emb_pos_u, emb_pos_v, True + ) ## Negative emb_neg_u = emb_pos_u.repeat((self.negative, 1)) if bs < self.batch_size: - index_emb_negu, index_emb_negv = init_emb2neg_index(self.negative, bs) + index_emb_negu, index_emb_negv = init_emb2neg_index( + self.negative, bs + ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: index_emb_negu = self.index_emb_negu index_emb_negv = self.index_emb_negv - + if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: - emb_neg_v = self.fst_u_embeddings.weight[neg_nodes].to(self.device) + emb_neg_v = self.fst_u_embeddings.weight[neg_nodes].to( + self.device + ) - grad_u_neg, grad_v_neg = self.fast_neg_bp(emb_neg_u, emb_neg_v, True) + grad_u_neg, grad_v_neg = self.fast_neg_bp( + emb_neg_u, emb_neg_v, True + ) ## Update grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg) @@ -329,12 +390,33 @@ def fast_learn(self, batch_edges, neg_nodes=None): grad_v = grad_v_pos else: grad_v = grad_v_pos - + # use adam optimizer - grad_u = adam(grad_u, self.fst_state_sum_u, nodes[:, 0], lr, self.device, self.only_gpu) - grad_v = adam(grad_v, self.fst_state_sum_u, nodes[:, 1], lr, self.device, self.only_gpu) + grad_u = adam( + grad_u, + self.fst_state_sum_u, + nodes[:, 0], + lr, + self.device, + self.only_gpu, + ) + grad_v = adam( + grad_v, + self.fst_state_sum_u, + nodes[:, 1], + lr, + self.device, + self.only_gpu, + ) if neg_nodes is not None: - grad_v_neg = adam(grad_v_neg, self.fst_state_sum_u, neg_nodes, lr, self.device, self.only_gpu) + grad_v_neg = adam( + grad_v_neg, + self.fst_state_sum_u, + neg_nodes, + lr, + self.device, + self.only_gpu, + ) if self.mixed_train: grad_u = grad_u.cpu() @@ -351,27 +433,47 @@ def fast_learn(self, batch_edges, neg_nodes=None): if neg_nodes is not None: neg_nodes.share_memory_() grad_v_neg.share_memory_() - self.async_q.put((grad_u, grad_v, grad_v_neg, nodes, neg_nodes, True)) - + self.async_q.put( + (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, True) + ) + if not self.async_update: - self.fst_u_embeddings.weight.data.index_add_(0, nodes[:, 0], grad_u) - self.fst_u_embeddings.weight.data.index_add_(0, nodes[:, 1], grad_v) + self.fst_u_embeddings.weight.data.index_add_( + 0, nodes[:, 0], grad_u + ) + self.fst_u_embeddings.weight.data.index_add_( + 0, nodes[:, 1], grad_v + ) if neg_nodes is not None: - self.fst_u_embeddings.weight.data.index_add_(0, neg_nodes, grad_v_neg) + self.fst_u_embeddings.weight.data.index_add_( + 0, neg_nodes, grad_v_neg + ) if self.snd: - emb_u = self.snd_u_embeddings(nodes[:, 0]).view(-1, self.emb_dimension).to(self.device) - emb_v = self.snd_v_embeddings(nodes[:, 1]).view(-1, self.emb_dimension).to(self.device) + emb_u = ( + self.snd_u_embeddings(nodes[:, 0]) + .view(-1, self.emb_dimension) + .to(self.device) + ) + emb_v = ( + self.snd_v_embeddings(nodes[:, 1]) + .view(-1, self.emb_dimension) + .to(self.device) + ) ## Postive emb_pos_u, emb_pos_v = emb_u, emb_v - grad_u_pos, grad_v_pos = self.fast_pos_bp(emb_pos_u, emb_pos_v, False) + grad_u_pos, grad_v_pos = self.fast_pos_bp( + emb_pos_u, emb_pos_v, False + ) ## Negative emb_neg_u = emb_pos_u.repeat((self.negative, 1)) if bs < self.batch_size: - index_emb_negu, index_emb_negv = init_emb2neg_index(self.negative, bs) + index_emb_negu, index_emb_negv = init_emb2neg_index( + self.negative, bs + ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: @@ -381,9 +483,13 @@ def fast_learn(self, batch_edges, neg_nodes=None): if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: - emb_neg_v = self.snd_v_embeddings.weight[neg_nodes].to(self.device) + emb_neg_v = self.snd_v_embeddings.weight[neg_nodes].to( + self.device + ) - grad_u_neg, grad_v_neg = self.fast_neg_bp(emb_neg_u, emb_neg_v, False) + grad_u_neg, grad_v_neg = self.fast_neg_bp( + emb_neg_u, emb_neg_v, False + ) ## Update grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg) @@ -393,12 +499,33 @@ def fast_learn(self, batch_edges, neg_nodes=None): grad_v = grad_v_pos else: grad_v = grad_v_pos - + # use adam optimizer - grad_u = adam(grad_u, self.snd_state_sum_u, nodes[:, 0], lr, self.device, self.only_gpu) - grad_v = adam(grad_v, self.snd_state_sum_v, nodes[:, 1], lr, self.device, self.only_gpu) + grad_u = adam( + grad_u, + self.snd_state_sum_u, + nodes[:, 0], + lr, + self.device, + self.only_gpu, + ) + grad_v = adam( + grad_v, + self.snd_state_sum_v, + nodes[:, 1], + lr, + self.device, + self.only_gpu, + ) if neg_nodes is not None: - grad_v_neg = adam(grad_v_neg, self.snd_state_sum_v, neg_nodes, lr, self.device, self.only_gpu) + grad_v_neg = adam( + grad_v_neg, + self.snd_state_sum_v, + neg_nodes, + lr, + self.device, + self.only_gpu, + ) if self.mixed_train: grad_u = grad_u.cpu() @@ -415,37 +542,51 @@ def fast_learn(self, batch_edges, neg_nodes=None): if neg_nodes is not None: neg_nodes.share_memory_() grad_v_neg.share_memory_() - self.async_q.put((grad_u, grad_v, grad_v_neg, nodes, neg_nodes, False)) - + self.async_q.put( + (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, False) + ) + if not self.async_update: - self.snd_u_embeddings.weight.data.index_add_(0, nodes[:, 0], grad_u) - self.snd_v_embeddings.weight.data.index_add_(0, nodes[:, 1], grad_v) + self.snd_u_embeddings.weight.data.index_add_( + 0, nodes[:, 0], grad_u + ) + self.snd_v_embeddings.weight.data.index_add_( + 0, nodes[:, 1], grad_v + ) if neg_nodes is not None: - self.snd_v_embeddings.weight.data.index_add_(0, neg_nodes, grad_v_neg) + self.snd_v_embeddings.weight.data.index_add_( + 0, neg_nodes, grad_v_neg + ) return def get_embedding(self): if self.fst: embedding_fst = self.fst_u_embeddings.weight.cpu().data.numpy() - embedding_fst /= np.sqrt(np.sum(embedding_fst * embedding_fst, 1)).reshape(-1, 1) + embedding_fst /= np.sqrt( + np.sum(embedding_fst * embedding_fst, 1) + ).reshape(-1, 1) if self.snd: embedding_snd = self.snd_u_embeddings.weight.cpu().data.numpy() - embedding_snd /= np.sqrt(np.sum(embedding_snd * embedding_snd, 1)).reshape(-1, 1) + embedding_snd /= np.sqrt( + np.sum(embedding_snd * embedding_snd, 1) + ).reshape(-1, 1) if self.fst and self.snd: embedding = np.concatenate((embedding_fst, embedding_snd), 1) - embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape(-1, 1) + embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( + -1, 1 + ) elif self.fst and not self.snd: embedding = embedding_fst elif self.snd and not self.fst: embedding = embedding_snd else: pass - + return embedding def save_embedding(self, dataset, file_name): - """ Write embedding to local file. Only used when node ids are numbers. + """Write embedding to local file. Only used when node ids are numbers. Parameter --------- @@ -456,7 +597,7 @@ def save_embedding(self, dataset, file_name): np.save(file_name, embedding) def save_embedding_pt(self, dataset, file_name): - """ For ogb leaderboard. """ + """For ogb leaderboard.""" embedding = torch.Tensor(self.get_embedding()).cpu() embedding_empty = torch.zeros_like(embedding.data) valid_nodes = torch.LongTensor(dataset.valid_nodes) diff --git a/examples/pytorch/ogb/line/reading_data.py b/examples/pytorch/ogb/line/reading_data.py index a963506504ef..5a7bc36cfb8b 100644 --- a/examples/pytorch/ogb/line/reading_data.py +++ b/examples/pytorch/ogb/line/reading_data.py @@ -1,16 +1,24 @@ import os +import pickle +import random +import time + import numpy as np import scipy.sparse as sp -import pickle import torch from torch.utils.data import DataLoader -from dgl.data.utils import download, _get_dgl_url, get_download_dir, extract_archive -import random -import time + import dgl +from dgl.data.utils import ( + _get_dgl_url, + download, + extract_archive, + get_download_dir, +) + def ReadTxtNet(file_path="", undirected=True): - """ Read the txt network file. + """Read the txt network file. Notations: The network is unweighted. Parameters @@ -21,16 +29,20 @@ def ReadTxtNet(file_path="", undirected=True): Return ------ net dict : a dict recording the connections in the graph - node2id dict : a dict mapping the nodes to their embedding indices + node2id dict : a dict mapping the nodes to their embedding indices id2node dict : a dict mapping nodes embedding indices to the nodes """ - if file_path == 'youtube' or file_path == 'blog': + if file_path == "youtube" or file_path == "blog": name = file_path dir = get_download_dir() - zip_file_path='{}/{}.zip'.format(dir, name) - download(_get_dgl_url(os.path.join('dataset/DeepWalk/', '{}.zip'.format(file_path))), path=zip_file_path) - extract_archive(zip_file_path, - '{}/{}'.format(dir, name)) + zip_file_path = "{}/{}.zip".format(dir, name) + download( + _get_dgl_url( + os.path.join("dataset/DeepWalk/", "{}.zip".format(file_path)) + ), + path=zip_file_path, + ) + extract_archive(zip_file_path, "{}/{}".format(dir, name)) file_path = "{}/{}/{}-net.txt".format(dir, name, name) node2id = {} @@ -44,7 +56,10 @@ def ReadTxtNet(file_path="", undirected=True): with open(file_path, "r") as f: for line in f.readlines(): tup = list(map(int, line.strip().split(" "))) - assert len(tup) in [2, 3], "The format of network file is unrecognizable." + assert len(tup) in [ + 2, + 3, + ], "The format of network file is unrecognizable." if len(tup) == 3: n1, n2, w = tup elif len(tup) == 2: @@ -71,7 +86,7 @@ def ReadTxtNet(file_path="", undirected=True): src.append(n1) dst.append(n2) weight.append(w) - + if undirected: if n2 not in net: net[n2] = {n1: w} @@ -88,16 +103,15 @@ def ReadTxtNet(file_path="", undirected=True): print("edge num: %d" % len(src)) assert max(net.keys()) == len(net) - 1, "error reading net, quit" - sm = sp.coo_matrix( - (np.array(weight), (src, dst)), - dtype=np.float32) + sm = sp.coo_matrix((np.array(weight), (src, dst)), dtype=np.float32) return net, node2id, id2node, sm + def net2graph(net_sm): - """ Transform the network to DGL graph + """Transform the network to DGL graph - Return + Return ------ G DGLGraph : graph by DGL """ @@ -108,29 +122,33 @@ def net2graph(net_sm): print("Building DGLGraph in %.2fs" % t) return G + def make_undirected(G): - #G.readonly(False) + # G.readonly(False) G.add_edges(G.edges()[1], G.edges()[0]) return G + def find_connected_nodes(G): nodes = torch.nonzero(G.out_degrees(), as_tuple=False).squeeze(-1) return nodes + class LineDataset: - def __init__(self, - net_file, - batch_size, - num_samples, - negative=5, - gpus=[0], - fast_neg=True, - ogbl_name="", - load_from_ogbl=False, - ogbn_name="", - load_from_ogbn=False, - ): - """ This class has the following functions: + def __init__( + self, + net_file, + batch_size, + num_samples, + negative=5, + gpus=[0], + fast_neg=True, + ogbl_name="", + load_from_ogbl=False, + ogbn_name="", + load_from_ogbn=False, + ): + """This class has the following functions: 1. Transform the txt network file into DGL graph; 2. Generate random walk sequences for the trainer; 3. Provide the negative table if the user hopes to sample negative @@ -153,12 +171,18 @@ def __init__(self, self.fast_neg = fast_neg if load_from_ogbl: - assert len(gpus) == 1, "ogb.linkproppred is not compatible with multi-gpu training." + assert ( + len(gpus) == 1 + ), "ogb.linkproppred is not compatible with multi-gpu training." from load_dataset import load_from_ogbl_with_name + self.G = load_from_ogbl_with_name(ogbl_name) elif load_from_ogbn: - assert len(gpus) == 1, "ogb.linkproppred is not compatible with multi-gpu training." + assert ( + len(gpus) == 1 + ), "ogb.linkproppred is not compatible with multi-gpu training." from load_dataset import load_from_ogbn_with_name + self.G = load_from_ogbn_with_name(ogbn_name) else: self.G = dgl.load_graphs(net_file)[0][0] @@ -168,12 +192,14 @@ def __init__(self, self.num_nodes = self.G.number_of_nodes() start = time.time() - seeds = np.random.choice(np.arange(self.G.number_of_edges()), - self.num_samples, - replace=True) # edge index - self.seeds = torch.split(torch.LongTensor(seeds), - int(np.ceil(self.num_samples / self.num_procs)), - 0) + seeds = np.random.choice( + np.arange(self.G.number_of_edges()), self.num_samples, replace=True + ) # edge index + self.seeds = torch.split( + torch.LongTensor(seeds), + int(np.ceil(self.num_samples / self.num_procs)), + 0, + ) end = time.time() t = end - start print("generate %d samples in %.2fs" % (len(seeds), t)) @@ -186,7 +212,7 @@ def __init__(self, node_degree /= np.sum(node_degree) node_degree = np.array(node_degree * 1e8, dtype=np.int) self.neg_table = [] - + for idx, node in enumerate(self.valid_nodes): self.neg_table += [node] * node_degree[idx] self.neg_table_size = len(self.neg_table) @@ -194,19 +220,22 @@ def __init__(self, del node_degree def create_sampler(self, i): - """ create random walk sampler """ + """create random walk sampler""" return EdgeSampler(self.G, self.seeds[i]) def save_mapping(self, map_file): with open(map_file, "wb") as f: pickle.dump(self.node2id, f) + class EdgeSampler(object): def __init__(self, G, seeds): self.G = G self.seeds = seeds - self.edges = torch.cat((self.G.edges()[0].unsqueeze(0), self.G.edges()[1].unsqueeze(0)), 0).t() - + self.edges = torch.cat( + (self.G.edges()[0].unsqueeze(0), self.G.edges()[1].unsqueeze(0)), 0 + ).t() + def sample(self, seeds): - """ seeds torch.LongTensor : a batch of indices of edges """ + """seeds torch.LongTensor : a batch of indices of edges""" return self.edges[torch.LongTensor(seeds)] diff --git a/examples/pytorch/ogb/line/utils.py b/examples/pytorch/ogb/line/utils.py index 521aab763211..1bdddcd62d4d 100644 --- a/examples/pytorch/ogb/line/utils.py +++ b/examples/pytorch/ogb/line/utils.py @@ -1,15 +1,19 @@ import torch + def check_args(args): flag = sum([args.only_1st, args.only_2nd]) - assert flag <= 1, "no more than one selection from --only_1st and --only_2nd" + assert ( + flag <= 1 + ), "no more than one selection from --only_1st and --only_2nd" if flag == 0: assert args.dim % 2 == 0, "embedding dimension must be an even number" if args.async_update: assert args.mix, "please use --async_update with --mix" + def sum_up_params(model): - """ Count the model parameters """ + """Count the model parameters""" n = [] if model.fst: p = model.fst_u_embeddings.weight.cpu().data.numel() diff --git a/examples/pytorch/ogb/ngnn/main.py b/examples/pytorch/ogb/ngnn/main.py index d93693a8c2fa..5c1aff4dcd47 100644 --- a/examples/pytorch/ogb/ngnn/main.py +++ b/examples/pytorch/ogb/ngnn/main.py @@ -3,14 +3,14 @@ import torch import torch.nn.functional as F +from ogb.linkproppred import DglLinkPropPredDataset, Evaluator from torch.nn import Linear from torch.utils.data import DataLoader import dgl -from dgl.nn.pytorch import GraphConv, SAGEConv from dgl.dataloading.negative_sampler import GlobalUniform +from dgl.nn.pytorch import GraphConv, SAGEConv -from ogb.linkproppred import DglLinkPropPredDataset, Evaluator class Logger(object): def __init__(self, runs, info=None): @@ -56,9 +56,13 @@ def print_statistics(self, run=None): class NGNN_GCNConv(torch.nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_nonl_layers): + def __init__( + self, in_channels, hidden_channels, out_channels, num_nonl_layers + ): super(NGNN_GCNConv, self).__init__() - self.num_nonl_layers = num_nonl_layers # number of nonlinear layers in each conv layer + self.num_nonl_layers = ( + num_nonl_layers # number of nonlinear layers in each conv layer + ) self.conv = GraphConv(in_channels, hidden_channels) self.fc = Linear(hidden_channels, hidden_channels) self.fc2 = Linear(hidden_channels, out_channels) @@ -66,7 +70,7 @@ def __init__(self, in_channels, hidden_channels, out_channels, num_nonl_layers): def reset_parameters(self): self.conv.reset_parameters() - gain = torch.nn.init.calculate_gain('relu') + gain = torch.nn.init.calculate_gain("relu") torch.nn.init.xavier_uniform_(self.fc.weight, gain=gain) torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain) for bias in [self.fc.bias, self.fc2.bias]: @@ -79,28 +83,54 @@ def forward(self, g, x): if self.num_nonl_layers == 2: x = F.relu(x) x = self.fc(x) - + x = F.relu(x) x = self.fc2(x) return x + class GCN(torch.nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout, ngnn_type, dataset): + def __init__( + self, + in_channels, + hidden_channels, + out_channels, + num_layers, + dropout, + ngnn_type, + dataset, + ): super(GCN, self).__init__() self.dataset = dataset self.convs = torch.nn.ModuleList() - num_nonl_layers = 1 if num_layers <= 2 else 2 # number of nonlinear layers in each conv layer - if ngnn_type == 'input': - self.convs.append(NGNN_GCNConv(in_channels, hidden_channels, hidden_channels, num_nonl_layers)) + num_nonl_layers = ( + 1 if num_layers <= 2 else 2 + ) # number of nonlinear layers in each conv layer + if ngnn_type == "input": + self.convs.append( + NGNN_GCNConv( + in_channels, + hidden_channels, + hidden_channels, + num_nonl_layers, + ) + ) for _ in range(num_layers - 2): self.convs.append(GraphConv(hidden_channels, hidden_channels)) - elif ngnn_type == 'hidden': + elif ngnn_type == "hidden": self.convs.append(GraphConv(in_channels, hidden_channels)) for _ in range(num_layers - 2): - self.convs.append(NGNN_GCNConv(hidden_channels, hidden_channels, hidden_channels, num_nonl_layers)) - + self.convs.append( + NGNN_GCNConv( + hidden_channels, + hidden_channels, + hidden_channels, + num_nonl_layers, + ) + ) + self.convs.append(GraphConv(hidden_channels, out_channels)) self.dropout = dropout @@ -120,10 +150,19 @@ def forward(self, g, x): class NGNN_SAGEConv(torch.nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_nonl_layers, - *, reduce): + def __init__( + self, + in_channels, + hidden_channels, + out_channels, + num_nonl_layers, + *, + reduce, + ): super(NGNN_SAGEConv, self).__init__() - self.num_nonl_layers = num_nonl_layers # number of nonlinear layers in each conv layer + self.num_nonl_layers = ( + num_nonl_layers # number of nonlinear layers in each conv layer + ) self.conv = SAGEConv(in_channels, hidden_channels, reduce) self.fc = Linear(hidden_channels, hidden_channels) self.fc2 = Linear(hidden_channels, out_channels) @@ -131,7 +170,7 @@ def __init__(self, in_channels, hidden_channels, out_channels, num_nonl_layers, def reset_parameters(self): self.conv.reset_parameters() - gain = torch.nn.init.calculate_gain('relu') + gain = torch.nn.init.calculate_gain("relu") torch.nn.init.xavier_uniform_(self.fc.weight, gain=gain) torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain) for bias in [self.fc.bias, self.fc2.bias]: @@ -144,28 +183,59 @@ def forward(self, g, x): if self.num_nonl_layers == 2: x = F.relu(x) x = self.fc(x) - + x = F.relu(x) x = self.fc2(x) return x + class SAGE(torch.nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout, ngnn_type, dataset, reduce='mean'): + def __init__( + self, + in_channels, + hidden_channels, + out_channels, + num_layers, + dropout, + ngnn_type, + dataset, + reduce="mean", + ): super(SAGE, self).__init__() self.dataset = dataset self.convs = torch.nn.ModuleList() - - num_nonl_layers = 1 if num_layers <= 2 else 2 # number of nonlinear layers in each conv layer - if ngnn_type == 'input': - self.convs.append(NGNN_SAGEConv(in_channels, hidden_channels, hidden_channels, num_nonl_layers, reduce=reduce)) + + num_nonl_layers = ( + 1 if num_layers <= 2 else 2 + ) # number of nonlinear layers in each conv layer + if ngnn_type == "input": + self.convs.append( + NGNN_SAGEConv( + in_channels, + hidden_channels, + hidden_channels, + num_nonl_layers, + reduce=reduce, + ) + ) for _ in range(num_layers - 2): - self.convs.append(SAGEConv(hidden_channels, hidden_channels, reduce)) - elif ngnn_type == 'hidden': + self.convs.append( + SAGEConv(hidden_channels, hidden_channels, reduce) + ) + elif ngnn_type == "hidden": self.convs.append(SAGEConv(in_channels, hidden_channels, reduce)) for _ in range(num_layers - 2): - self.convs.append(NGNN_SAGEConv(hidden_channels, hidden_channels, hidden_channels, num_nonl_layers, reduce=reduce)) - + self.convs.append( + NGNN_SAGEConv( + hidden_channels, + hidden_channels, + hidden_channels, + num_nonl_layers, + reduce=reduce, + ) + ) + self.convs.append(SAGEConv(hidden_channels, out_channels, reduce)) self.dropout = dropout @@ -185,7 +255,9 @@ def forward(self, g, x): class LinkPredictor(torch.nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout): + def __init__( + self, in_channels, hidden_channels, out_channels, num_layers, dropout + ): super(LinkPredictor, self).__init__() self.lins = torch.nn.ModuleList() @@ -215,11 +287,12 @@ def train(model, predictor, g, x, split_edge, optimizer, batch_size): model.train() predictor.train() - pos_train_edge = split_edge['train']['edge'].to(x.device) + pos_train_edge = split_edge["train"]["edge"].to(x.device) neg_sampler = GlobalUniform(1) total_loss = total_examples = 0 - for perm in DataLoader(range(pos_train_edge.size(0)), batch_size, - shuffle=True): + for perm in DataLoader( + range(pos_train_edge.size(0)), batch_size, shuffle=True + ): optimizer.zero_grad() h = model(g, x) @@ -237,7 +310,7 @@ def train(model, predictor, g, x, split_edge, optimizer, batch_size): loss = pos_loss + neg_loss loss.backward() - if model.dataset == 'ogbl-ddi': + if model.dataset == "ogbl-ddi": torch.nn.utils.clip_grad_norm_(x, 1.0) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) torch.nn.utils.clip_grad_norm_(predictor.parameters(), 1.0) @@ -258,11 +331,11 @@ def test(model, predictor, g, x, split_edge, evaluator, batch_size): h = model(g, x) - pos_train_edge = split_edge['eval_train']['edge'].to(h.device) - pos_valid_edge = split_edge['valid']['edge'].to(h.device) - neg_valid_edge = split_edge['valid']['edge_neg'].to(h.device) - pos_test_edge = split_edge['test']['edge'].to(h.device) - neg_test_edge = split_edge['test']['edge_neg'].to(h.device) + pos_train_edge = split_edge["eval_train"]["edge"].to(h.device) + pos_valid_edge = split_edge["valid"]["edge"].to(h.device) + neg_valid_edge = split_edge["valid"]["edge_neg"].to(h.device) + pos_test_edge = split_edge["test"]["edge"].to(h.device) + neg_test_edge = split_edge["test"]["edge_neg"].to(h.device) def get_pred(test_edges, h): preds = [] @@ -271,7 +344,7 @@ def get_pred(test_edges, h): preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()] pred = torch.cat(preds, dim=0) return pred - + pos_train_pred = get_pred(pos_train_edge, h) pos_valid_pred = get_pred(pos_valid_edge, h) neg_valid_pred = get_pred(neg_valid_edge, h) @@ -281,50 +354,84 @@ def get_pred(test_edges, h): results = {} for K in [20, 50, 100]: evaluator.K = K - train_hits = evaluator.eval({ - 'y_pred_pos': pos_train_pred, - 'y_pred_neg': neg_valid_pred, - })[f'hits@{K}'] - valid_hits = evaluator.eval({ - 'y_pred_pos': pos_valid_pred, - 'y_pred_neg': neg_valid_pred, - })[f'hits@{K}'] - test_hits = evaluator.eval({ - 'y_pred_pos': pos_test_pred, - 'y_pred_neg': neg_test_pred, - })[f'hits@{K}'] - - results[f'Hits@{K}'] = (train_hits, valid_hits, test_hits) + train_hits = evaluator.eval( + { + "y_pred_pos": pos_train_pred, + "y_pred_neg": neg_valid_pred, + } + )[f"hits@{K}"] + valid_hits = evaluator.eval( + { + "y_pred_pos": pos_valid_pred, + "y_pred_neg": neg_valid_pred, + } + )[f"hits@{K}"] + test_hits = evaluator.eval( + { + "y_pred_pos": pos_test_pred, + "y_pred_neg": neg_test_pred, + } + )[f"hits@{K}"] + + results[f"Hits@{K}"] = (train_hits, valid_hits, test_hits) return results def main(): - parser = argparse.ArgumentParser(description='OGBL(Full Batch GCN/GraphSage + NGNN)') + parser = argparse.ArgumentParser( + description="OGBL(Full Batch GCN/GraphSage + NGNN)" + ) # dataset setting - parser.add_argument('--dataset', type=str, default='ogbl-ddi', choices=['ogbl-ddi', 'ogbl-collab', 'ogbl-ppa']) + parser.add_argument( + "--dataset", + type=str, + default="ogbl-ddi", + choices=["ogbl-ddi", "ogbl-collab", "ogbl-ppa"], + ) # device setting - parser.add_argument('--device', type=int, default=0, help='GPU device ID. Use -1 for CPU training.') + parser.add_argument( + "--device", + type=int, + default=0, + help="GPU device ID. Use -1 for CPU training.", + ) # model structure settings - parser.add_argument('--use_sage', action='store_true', help='If not set, use GCN by default.') - parser.add_argument('--ngnn_type', type=str, default="input", choices=['input', 'hidden'], help="You can set this value from 'input' or 'hidden' to apply NGNN to different GNN layers.") - parser.add_argument('--num_layers', type=int, default=3, help='number of GNN layers') - parser.add_argument('--hidden_channels', type=int, default=256) - parser.add_argument('--dropout', type=float, default=0.0) - parser.add_argument('--batch_size', type=int, default=64 * 1024) - parser.add_argument('--lr', type=float, default=0.001) - parser.add_argument('--epochs', type=int, default=400) + parser.add_argument( + "--use_sage", + action="store_true", + help="If not set, use GCN by default.", + ) + parser.add_argument( + "--ngnn_type", + type=str, + default="input", + choices=["input", "hidden"], + help="You can set this value from 'input' or 'hidden' to apply NGNN to different GNN layers.", + ) + parser.add_argument( + "--num_layers", type=int, default=3, help="number of GNN layers" + ) + parser.add_argument("--hidden_channels", type=int, default=256) + parser.add_argument("--dropout", type=float, default=0.0) + parser.add_argument("--batch_size", type=int, default=64 * 1024) + parser.add_argument("--lr", type=float, default=0.001) + parser.add_argument("--epochs", type=int, default=400) # training settings - parser.add_argument('--eval_steps', type=int, default=1) - parser.add_argument('--runs', type=int, default=10) + parser.add_argument("--eval_steps", type=int, default=1) + parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) - device = f'cuda:{args.device}' if args.device != -1 and torch.cuda.is_available() else 'cpu' + device = ( + f"cuda:{args.device}" + if args.device != -1 and torch.cuda.is_available() + else "cpu" + ) device = torch.device(device) dataset = DglLinkPropPredDataset(name=args.dataset) @@ -332,70 +439,101 @@ def main(): split_edge = dataset.get_edge_split() # We randomly pick some training samples that we want to evaluate on: - idx = torch.randperm(split_edge['train']['edge'].size(0)) - idx = idx[:split_edge['valid']['edge'].size(0)] - split_edge['eval_train'] = {'edge': split_edge['train']['edge'][idx]} + idx = torch.randperm(split_edge["train"]["edge"].size(0)) + idx = idx[: split_edge["valid"]["edge"].size(0)] + split_edge["eval_train"] = {"edge": split_edge["train"]["edge"][idx]} - if dataset.name == 'ogbl-ppa': - g.ndata['feat'] = g.ndata['feat'].to(torch.float) + if dataset.name == "ogbl-ppa": + g.ndata["feat"] = g.ndata["feat"].to(torch.float) - if dataset.name == 'ogbl-ddi': + if dataset.name == "ogbl-ddi": emb = torch.nn.Embedding(g.num_nodes(), args.hidden_channels).to(device) in_channels = args.hidden_channels - else: # ogbl-collab, ogbl-ppa - in_channels = g.ndata['feat'].size(-1) - + else: # ogbl-collab, ogbl-ppa + in_channels = g.ndata["feat"].size(-1) + # select model if args.use_sage: - model = SAGE(in_channels, args.hidden_channels, - args.hidden_channels, args.num_layers, - args.dropout, args.ngnn_type, dataset.name) - else: # GCN + model = SAGE( + in_channels, + args.hidden_channels, + args.hidden_channels, + args.num_layers, + args.dropout, + args.ngnn_type, + dataset.name, + ) + else: # GCN g = dgl.add_self_loop(g) - model = GCN(in_channels, args.hidden_channels, - args.hidden_channels, args.num_layers, - args.dropout, args.ngnn_type, dataset.name) - - predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, 3, args.dropout) + model = GCN( + in_channels, + args.hidden_channels, + args.hidden_channels, + args.num_layers, + args.dropout, + args.ngnn_type, + dataset.name, + ) + + predictor = LinkPredictor( + args.hidden_channels, args.hidden_channels, 1, 3, args.dropout + ) g, model, predictor = map(lambda x: x.to(device), (g, model, predictor)) evaluator = Evaluator(name=dataset.name) loggers = { - 'Hits@20': Logger(args.runs, args), - 'Hits@50': Logger(args.runs, args), - 'Hits@100': Logger(args.runs, args), + "Hits@20": Logger(args.runs, args), + "Hits@50": Logger(args.runs, args), + "Hits@100": Logger(args.runs, args), } for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() - if dataset.name == 'ogbl-ddi': + if dataset.name == "ogbl-ddi": torch.nn.init.xavier_uniform_(emb.weight) - g.ndata['feat'] = emb.weight + g.ndata["feat"] = emb.weight optimizer = torch.optim.Adam( - list(model.parameters()) + list(predictor.parameters()) + ( - list(emb.parameters()) if dataset.name == 'ogbl-ddi' else [] - ), - lr=args.lr) + list(model.parameters()) + + list(predictor.parameters()) + + (list(emb.parameters()) if dataset.name == "ogbl-ddi" else []), + lr=args.lr, + ) for epoch in range(1, 1 + args.epochs): - loss = train(model, predictor, g, g.ndata['feat'], split_edge, optimizer, - args.batch_size) + loss = train( + model, + predictor, + g, + g.ndata["feat"], + split_edge, + optimizer, + args.batch_size, + ) if epoch % args.eval_steps == 0: - results = test(model, predictor, g, g.ndata['feat'], split_edge, evaluator, - args.batch_size) + results = test( + model, + predictor, + g, + g.ndata["feat"], + split_edge, + evaluator, + args.batch_size, + ) for key, result in results.items(): loggers[key].add_result(run, result) train_hits, valid_hits, test_hits = result print(key) - print(f'Run: {run + 1:02d}, ' - f'Epoch: {epoch:02d}, ' - f'Loss: {loss:.4f}, ' - f'Train: {100 * train_hits:.2f}%, ' - f'Valid: {100 * valid_hits:.2f}%, ' - f'Test: {100 * test_hits:.2f}%') - print('---') + print( + f"Run: {run + 1:02d}, " + f"Epoch: {epoch:02d}, " + f"Loss: {loss:.4f}, " + f"Train: {100 * train_hits:.2f}%, " + f"Valid: {100 * valid_hits:.2f}%, " + f"Test: {100 * test_hits:.2f}%" + ) + print("---") for key in loggers.keys(): print(key) diff --git a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py index 564aa108a712..a4b6059e586e 100644 --- a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py +++ b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py @@ -4,9 +4,10 @@ import numpy as np import torch import torch.nn.functional as F -from dgl import function as fn from ogb.nodeproppred import DglNodePropPredDataset, Evaluator +from dgl import function as fn + device = None dataset = "ogbn-arxiv" @@ -20,7 +21,11 @@ def load_data(dataset): evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] n_node_feats = graph.ndata["feat"].shape[1] @@ -46,7 +51,9 @@ def preprocess(graph): return graph -def general_outcome_correlation(graph, y0, n_prop=50, alpha=0.8, use_norm=False, post_step=None): +def general_outcome_correlation( + graph, y0, n_prop=50, alpha=0.8, use_norm=False, post_step=None +): with graph.local_scope(): y = y0 for _ in range(n_prop): @@ -94,7 +101,9 @@ def run(args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator): # dy = torch.zeros(graph.number_of_nodes(), n_classes, device=device) # dy[train_idx] = F.one_hot(labels[train_idx], n_classes).float().squeeze(1) - pred[train_idx] - _train_acc, val_acc, test_acc = evaluate(labels, y, train_idx, val_idx, test_idx, evaluator_wrapper) + _train_acc, val_acc, test_acc = evaluate( + labels, y, train_idx, val_idx, test_idx, evaluator_wrapper + ) # print("train acc:", _train_acc) print("original val acc:", val_acc) @@ -110,10 +119,16 @@ def run(args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator): # y = y + args.alpha2 * smoothed_dy # .clamp(0, 1) smoothed_y = general_outcome_correlation( - graph, y, alpha=args.alpha, use_norm=args.use_norm, post_step=lambda x: x.clamp(0, 1) + graph, + y, + alpha=args.alpha, + use_norm=args.use_norm, + post_step=lambda x: x.clamp(0, 1), ) - _train_acc, val_acc, test_acc = evaluate(labels, smoothed_y, train_idx, val_idx, test_idx, evaluator_wrapper) + _train_acc, val_acc, test_acc = evaluate( + labels, smoothed_y, train_idx, val_idx, test_idx, evaluator_wrapper + ) # print("train acc:", _train_acc) print("val acc:", val_acc) @@ -126,11 +141,24 @@ def main(): global device argparser = argparse.ArgumentParser(description="implementation of C&S)") - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.") + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides --gpu.", + ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") - argparser.add_argument("--use-norm", action="store_true", help="Use symmetrically normalized adjacency matrix.") + argparser.add_argument( + "--use-norm", + action="store_true", + help="Use symmetrically normalized adjacency matrix.", + ) argparser.add_argument("--alpha", type=float, default=0.6, help="alpha") - argparser.add_argument("--pred-files", type=str, default="./output/*.pt", help="address of prediction files") + argparser.add_argument( + "--pred-files", + type=str, + default="./output/*.pt", + help="address of prediction files", + ) args = argparser.parse_args() if args.cpu: @@ -152,7 +180,9 @@ def main(): for pred_file in glob.iglob(args.pred_files): print("load:", pred_file) pred = torch.load(pred_file) - val_acc, test_acc = run(args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator) + val_acc, test_acc = run( + args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator + ) val_accs.append(val_acc) test_accs.append(test_acc) diff --git a/examples/pytorch/ogb/ogbn-arxiv/gat.py b/examples/pytorch/ogb/ogbn-arxiv/gat.py index f87e346fd573..26bff9607688 100644 --- a/examples/pytorch/ogb/ogbn-arxiv/gat.py +++ b/examples/pytorch/ogb/ogbn-arxiv/gat.py @@ -7,16 +7,16 @@ import random import time -import dgl import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from matplotlib import pyplot as plt from matplotlib.ticker import AutoMinorLocator, MultipleLocator +from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator -from models import GAT +import dgl epsilon = 1 - math.log(2) @@ -44,7 +44,11 @@ def load_data(dataset): evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] n_node_feats = graph.ndata["feat"].shape[1] @@ -113,7 +117,17 @@ def adjust_learning_rate(optimizer, lr, epoch): param_group["lr"] = lr * epoch / 50 -def train(args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, evaluator): +def train( + args, + model, + graph, + labels, + train_idx, + val_idx, + test_idx, + optimizer, + evaluator, +): model.train() feat = graph.ndata["feat"] @@ -138,7 +152,9 @@ def train(args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, e for _ in range(args.n_label_iters): pred = pred.detach() torch.cuda.empty_cache() - feat[unlabel_idx, -n_classes:] = F.softmax(pred[unlabel_idx], dim=-1) + feat[unlabel_idx, -n_classes:] = F.softmax( + pred[unlabel_idx], dim=-1 + ) pred = model(graph, feat) loss = custom_loss_function(pred[train_pred_idx], labels[train_pred_idx]) @@ -149,7 +165,9 @@ def train(args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, e @torch.no_grad() -def evaluate(args, model, graph, labels, train_idx, val_idx, test_idx, evaluator): +def evaluate( + args, model, graph, labels, train_idx, val_idx, test_idx, evaluator +): model.eval() feat = graph.ndata["feat"] @@ -162,7 +180,9 @@ def evaluate(args, model, graph, labels, train_idx, val_idx, test_idx, evaluator if args.n_label_iters > 0: unlabel_idx = torch.cat([val_idx, test_idx]) for _ in range(args.n_label_iters): - feat[unlabel_idx, -n_classes:] = F.softmax(pred[unlabel_idx], dim=-1) + feat[unlabel_idx, -n_classes:] = F.softmax( + pred[unlabel_idx], dim=-1 + ) pred = model(graph, feat) train_loss = custom_loss_function(pred[train_idx], labels[train_idx]) @@ -180,14 +200,18 @@ def evaluate(args, model, graph, labels, train_idx, val_idx, test_idx, evaluator ) -def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): +def run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running +): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] # define model and optimizer model = gen_model(args).to(device) - optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.wd) + optimizer = optim.RMSprop( + model.parameters(), lr=args.lr, weight_decay=args.wd + ) # training loop total_time = 0 @@ -202,10 +226,35 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) adjust_learning_rate(optimizer, args.lr, epoch) - acc, loss = train(args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, evaluator_wrapper) + acc, loss = train( + args, + model, + graph, + labels, + train_idx, + val_idx, + test_idx, + optimizer, + evaluator_wrapper, + ) - train_acc, val_acc, test_acc, train_loss, val_loss, test_loss, pred = evaluate( - args, model, graph, labels, train_idx, val_idx, test_idx, evaluator_wrapper + ( + train_acc, + val_acc, + test_acc, + train_loss, + val_loss, + test_loss, + pred, + ) = evaluate( + args, + model, + graph, + labels, + train_idx, + val_idx, + test_idx, + evaluator_wrapper, ) toc = time.time() @@ -226,8 +275,26 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ) for l, e in zip( - [accs, train_accs, val_accs, test_accs, losses, train_losses, val_losses, test_losses], - [acc, train_acc, val_acc, test_acc, loss, train_loss, val_loss, test_loss], + [ + accs, + train_accs, + val_accs, + test_accs, + losses, + train_losses, + val_losses, + test_losses, + ], + [ + acc, + train_acc, + val_acc, + test_acc, + loss, + train_loss, + val_loss, + test_loss, + ], ): l.append(e) @@ -242,7 +309,10 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) - for y, label in zip([accs, train_accs, val_accs, test_accs], ["acc", "train acc", "val acc", "test acc"]): + for y, label in zip( + [accs, train_accs, val_accs, test_accs], + ["acc", "train acc", "val acc", "test acc"], + ): plt.plot(range(args.n_epochs), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) @@ -259,7 +329,8 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( - [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"] + [losses, train_losses, val_losses, test_losses], + ["loss", "train loss", "val loss", "test loss"], ): plt.plot(range(args.n_epochs), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) @@ -288,36 +359,84 @@ def main(): global device, n_node_feats, n_classes, epsilon argparser = argparse.ArgumentParser( - "GAT implementation on ogbn-arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter + "GAT implementation on ogbn-arxiv", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides --gpu.", ) - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.") argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument("--seed", type=int, default=0, help="seed") - argparser.add_argument("--n-runs", type=int, default=10, help="running times") - argparser.add_argument("--n-epochs", type=int, default=2000, help="number of epochs") argparser.add_argument( - "--use-labels", action="store_true", help="Use labels in the training set as input features." + "--n-runs", type=int, default=10, help="running times" + ) + argparser.add_argument( + "--n-epochs", type=int, default=2000, help="number of epochs" + ) + argparser.add_argument( + "--use-labels", + action="store_true", + help="Use labels in the training set as input features.", + ) + argparser.add_argument( + "--n-label-iters", + type=int, + default=0, + help="number of label iterations", + ) + argparser.add_argument( + "--mask-rate", type=float, default=0.5, help="mask rate" + ) + argparser.add_argument( + "--no-attn-dst", action="store_true", help="Don't use attn_dst." + ) + argparser.add_argument( + "--use-norm", + action="store_true", + help="Use symmetrically normalized adjacency matrix.", + ) + argparser.add_argument( + "--lr", type=float, default=0.002, help="learning rate" + ) + argparser.add_argument( + "--n-layers", type=int, default=3, help="number of layers" + ) + argparser.add_argument( + "--n-heads", type=int, default=3, help="number of heads" + ) + argparser.add_argument( + "--n-hidden", type=int, default=250, help="number of hidden units" + ) + argparser.add_argument( + "--dropout", type=float, default=0.75, help="dropout rate" + ) + argparser.add_argument( + "--input-drop", type=float, default=0.1, help="input drop rate" + ) + argparser.add_argument( + "--attn-drop", type=float, default=0.0, help="attention drop rate" + ) + argparser.add_argument( + "--edge-drop", type=float, default=0.0, help="edge drop rate" ) - argparser.add_argument("--n-label-iters", type=int, default=0, help="number of label iterations") - argparser.add_argument("--mask-rate", type=float, default=0.5, help="mask rate") - argparser.add_argument("--no-attn-dst", action="store_true", help="Don't use attn_dst.") - argparser.add_argument("--use-norm", action="store_true", help="Use symmetrically normalized adjacency matrix.") - argparser.add_argument("--lr", type=float, default=0.002, help="learning rate") - argparser.add_argument("--n-layers", type=int, default=3, help="number of layers") - argparser.add_argument("--n-heads", type=int, default=3, help="number of heads") - argparser.add_argument("--n-hidden", type=int, default=250, help="number of hidden units") - argparser.add_argument("--dropout", type=float, default=0.75, help="dropout rate") - argparser.add_argument("--input-drop", type=float, default=0.1, help="input drop rate") - argparser.add_argument("--attn-drop", type=float, default=0.0, help="attention drop rate") - argparser.add_argument("--edge-drop", type=float, default=0.0, help="edge drop rate") argparser.add_argument("--wd", type=float, default=0, help="weight decay") - argparser.add_argument("--log-every", type=int, default=20, help="log every LOG_EVERY epochs") - argparser.add_argument("--plot-curves", action="store_true", help="plot learning curves") - argparser.add_argument("--save-pred", action="store_true", help="save final predictions") + argparser.add_argument( + "--log-every", type=int, default=20, help="log every LOG_EVERY epochs" + ) + argparser.add_argument( + "--plot-curves", action="store_true", help="plot learning curves" + ) + argparser.add_argument( + "--save-pred", action="store_true", help="save final predictions" + ) args = argparser.parse_args() if not args.use_labels and args.n_label_iters > 0: - raise ValueError("'--use-labels' must be enabled when n_label_iters > 0") + raise ValueError( + "'--use-labels' must be enabled when n_label_iters > 0" + ) if args.cpu: device = torch.device("cpu") @@ -337,7 +456,9 @@ def main(): for i in range(args.n_runs): seed(args.seed + i) - val_acc, test_acc = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1) + val_acc, test_acc = run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1 + ) val_accs.append(val_acc) test_accs.append(test_acc) diff --git a/examples/pytorch/ogb/ogbn-arxiv/gcn.py b/examples/pytorch/ogb/ogbn-arxiv/gcn.py index adc9eade5bb3..97f71aaddf38 100644 --- a/examples/pytorch/ogb/ogbn-arxiv/gcn.py +++ b/examples/pytorch/ogb/ogbn-arxiv/gcn.py @@ -11,9 +11,8 @@ import torch.optim as optim from matplotlib import pyplot as plt from matplotlib.ticker import AutoMinorLocator, MultipleLocator -from ogb.nodeproppred import DglNodePropPredDataset, Evaluator - from models import GCN +from ogb.nodeproppred import DglNodePropPredDataset, Evaluator device = None in_feats, n_classes = None, None @@ -23,10 +22,24 @@ def gen_model(args): if args.use_labels: model = GCN( - in_feats + n_classes, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_linear + in_feats + n_classes, + args.n_hidden, + n_classes, + args.n_layers, + F.relu, + args.dropout, + args.use_linear, ) else: - model = GCN(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_linear) + model = GCN( + in_feats, + args.n_hidden, + n_classes, + args.n_layers, + F.relu, + args.dropout, + args.use_linear, + ) return model @@ -37,7 +50,9 @@ def cross_entropy(x, labels): def compute_acc(pred, labels, evaluator): - return evaluator.eval({"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels})["acc"] + return evaluator.eval( + {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} + )["acc"] def add_labels(feat, labels, idx): @@ -81,7 +96,9 @@ def train(model, graph, labels, train_idx, optimizer, use_labels): @th.no_grad() -def evaluate(model, graph, labels, train_idx, val_idx, test_idx, use_labels, evaluator): +def evaluate( + model, graph, labels, train_idx, val_idx, test_idx, use_labels, evaluator +): model.eval() feat = graph.ndata["feat"] @@ -104,14 +121,23 @@ def evaluate(model, graph, labels, train_idx, val_idx, test_idx, use_labels, eva ) -def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): +def run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running +): # define model and optimizer model = gen_model(args) model = model.to(device) - optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) + optimizer = optim.AdamW( + model.parameters(), lr=args.lr, weight_decay=args.wd + ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( - optimizer, mode="min", factor=0.5, patience=100, verbose=True, min_lr=1e-3 + optimizer, + mode="min", + factor=0.5, + patience=100, + verbose=True, + min_lr=1e-3, ) # training loop @@ -126,11 +152,27 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) adjust_learning_rate(optimizer, args.lr, epoch) - loss, pred = train(model, graph, labels, train_idx, optimizer, args.use_labels) + loss, pred = train( + model, graph, labels, train_idx, optimizer, args.use_labels + ) acc = compute_acc(pred[train_idx], labels[train_idx], evaluator) - train_acc, val_acc, test_acc, train_loss, val_loss, test_loss = evaluate( - model, graph, labels, train_idx, val_idx, test_idx, args.use_labels, evaluator + ( + train_acc, + val_acc, + test_acc, + train_loss, + val_loss, + test_loss, + ) = evaluate( + model, + graph, + labels, + train_idx, + val_idx, + test_idx, + args.use_labels, + evaluator, ) lr_scheduler.step(loss) @@ -152,8 +194,26 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ) for l, e in zip( - [accs, train_accs, val_accs, test_accs, losses, train_losses, val_losses, test_losses], - [acc, train_acc, val_acc, test_acc, loss, train_loss, val_loss, test_loss], + [ + accs, + train_accs, + val_accs, + test_accs, + losses, + train_losses, + val_losses, + test_losses, + ], + [ + acc, + train_acc, + val_acc, + test_acc, + loss, + train_loss, + val_loss, + test_loss, + ], ): l.append(e) @@ -167,7 +227,10 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) - for y, label in zip([accs, train_accs, val_accs, test_accs], ["acc", "train acc", "val acc", "test acc"]): + for y, label in zip( + [accs, train_accs, val_accs, test_accs], + ["acc", "train acc", "val acc", "test acc"], + ): plt.plot(range(args.n_epochs), y, label=label) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) @@ -184,7 +247,8 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( - [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"] + [losses, train_losses, val_losses, test_losses], + ["loss", "train loss", "val loss", "test loss"], ): plt.plot(range(args.n_epochs), y, label=label) ax.xaxis.set_major_locator(MultipleLocator(100)) @@ -202,28 +266,57 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) def count_parameters(args): model = gen_model(args) - return sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad]) + return sum( + [np.prod(p.size()) for p in model.parameters() if p.requires_grad] + ) def main(): global device, in_feats, n_classes - argparser = argparse.ArgumentParser("GCN on OGBN-Arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.") + argparser = argparse.ArgumentParser( + "GCN on OGBN-Arxiv", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides --gpu.", + ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") - argparser.add_argument("--n-runs", type=int, default=10, help="running times") - argparser.add_argument("--n-epochs", type=int, default=1000, help="number of epochs") argparser.add_argument( - "--use-labels", action="store_true", help="Use labels in the training set as input features." + "--n-runs", type=int, default=10, help="running times" + ) + argparser.add_argument( + "--n-epochs", type=int, default=1000, help="number of epochs" + ) + argparser.add_argument( + "--use-labels", + action="store_true", + help="Use labels in the training set as input features.", + ) + argparser.add_argument( + "--use-linear", action="store_true", help="Use linear layer." + ) + argparser.add_argument( + "--lr", type=float, default=0.005, help="learning rate" + ) + argparser.add_argument( + "--n-layers", type=int, default=3, help="number of layers" + ) + argparser.add_argument( + "--n-hidden", type=int, default=256, help="number of hidden units" + ) + argparser.add_argument( + "--dropout", type=float, default=0.5, help="dropout rate" ) - argparser.add_argument("--use-linear", action="store_true", help="Use linear layer.") - argparser.add_argument("--lr", type=float, default=0.005, help="learning rate") - argparser.add_argument("--n-layers", type=int, default=3, help="number of layers") - argparser.add_argument("--n-hidden", type=int, default=256, help="number of hidden units") - argparser.add_argument("--dropout", type=float, default=0.5, help="dropout rate") argparser.add_argument("--wd", type=float, default=0, help="weight decay") - argparser.add_argument("--log-every", type=int, default=20, help="log every LOG_EVERY epochs") - argparser.add_argument("--plot-curves", action="store_true", help="plot learning curves") + argparser.add_argument( + "--log-every", type=int, default=20, help="log every LOG_EVERY epochs" + ) + argparser.add_argument( + "--plot-curves", action="store_true", help="plot learning curves" + ) args = argparser.parse_args() if args.cpu: @@ -236,7 +329,11 @@ def main(): evaluator = Evaluator(name="ogbn-arxiv") splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] # add reverse edges @@ -263,7 +360,9 @@ def main(): test_accs = [] for i in range(args.n_runs): - val_acc, test_acc = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i) + val_acc, test_acc = run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + ) val_accs.append(val_acc) test_accs.append(test_acc) diff --git a/examples/pytorch/ogb/ogbn-arxiv/models.py b/examples/pytorch/ogb/ogbn-arxiv/models.py index f402c548e9a7..17b6525816f2 100644 --- a/examples/pytorch/ogb/ogbn-arxiv/models.py +++ b/examples/pytorch/ogb/ogbn-arxiv/models.py @@ -1,6 +1,7 @@ -import dgl.nn.pytorch as dglnn import torch import torch.nn as nn + +import dgl.nn.pytorch as dglnn from dgl import function as fn from dgl.ops import edge_softmax from dgl.utils import expand_as_pair @@ -42,7 +43,16 @@ def forward(self, x): class GCN(nn.Module): - def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, use_linear): + def __init__( + self, + in_feats, + n_hidden, + n_classes, + n_layers, + activation, + dropout, + use_linear, + ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden @@ -59,7 +69,9 @@ def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, out_hidden = n_hidden if i < n_layers - 1 else n_classes bias = i == n_layers - 1 - self.convs.append(dglnn.GraphConv(in_hidden, out_hidden, "both", bias=bias)) + self.convs.append( + dglnn.GraphConv(in_hidden, out_hidden, "both", bias=bias) + ) if use_linear: self.linear.append(nn.Linear(in_hidden, out_hidden, bias=False)) if i < n_layers - 1: @@ -113,13 +125,23 @@ def __init__( self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm if isinstance(in_feats, tuple): - self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) - self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) + self.fc_src = nn.Linear( + self._in_src_feats, out_feats * num_heads, bias=False + ) + self.fc_dst = nn.Linear( + self._in_dst_feats, out_feats * num_heads, bias=False + ) else: - self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) - self.attn_l = nn.Parameter(torch.FloatTensor(size=(1, num_heads, out_feats))) + self.fc = nn.Linear( + self._in_src_feats, out_feats * num_heads, bias=False + ) + self.attn_l = nn.Parameter( + torch.FloatTensor(size=(1, num_heads, out_feats)) + ) if use_attn_dst: - self.attn_r = nn.Parameter(torch.FloatTensor(size=(1, num_heads, out_feats))) + self.attn_r = nn.Parameter( + torch.FloatTensor(size=(1, num_heads, out_feats)) + ) else: self.register_buffer("attn_r", None) self.feat_drop = nn.Dropout(feat_drop) @@ -127,7 +149,9 @@ def __init__( self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: - self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) + self.res_fc = nn.Linear( + self._in_dst_feats, num_heads * out_feats, bias=False + ) else: self.register_buffer("res_fc", None) self.reset_parameters() @@ -161,12 +185,18 @@ def forward(self, graph, feat): if not hasattr(self, "fc_src"): self.fc_src, self.fc_dst = self.fc, self.fc feat_src, feat_dst = h_src, h_dst - feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) - feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) + feat_src = self.fc_src(h_src).view( + -1, self._num_heads, self._out_feats + ) + feat_dst = self.fc_dst(h_dst).view( + -1, self._num_heads, self._out_feats + ) else: h_src = self.feat_drop(feat) feat_src = h_src - feat_src = self.fc(h_src).view(-1, self._num_heads, self._out_feats) + feat_src = self.fc(h_src).view( + -1, self._num_heads, self._out_feats + ) if graph.is_block: h_dst = h_src[: graph.number_of_dst_nodes()] feat_dst = feat_src[: graph.number_of_dst_nodes()] @@ -207,7 +237,9 @@ def forward(self, graph, feat): bound = int(graph.number_of_edges() * self.edge_drop) eids = perm[bound:] graph.edata["a"] = torch.zeros_like(e) - graph.edata["a"][eids] = self.attn_drop(edge_softmax(graph, e[eids], eids=eids)) + graph.edata["a"][eids] = self.attn_drop( + edge_softmax(graph, e[eids], eids=eids) + ) else: graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) @@ -224,7 +256,9 @@ def forward(self, graph, feat): # residual if self.res_fc is not None: - resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) + resval = self.res_fc(h_dst).view( + h_dst.shape[0], -1, self._out_feats + ) rst = rst + resval # activation @@ -282,7 +316,9 @@ def __init__( if i < n_layers - 1: self.norms.append(nn.BatchNorm1d(out_channels * out_hidden)) - self.bias_last = ElementWiseLinear(n_classes, weight=False, bias=True, inplace=True) + self.bias_last = ElementWiseLinear( + n_classes, weight=False, bias=True, inplace=True + ) self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) diff --git a/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py b/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py index fbfd845fa5f6..4efd1195077a 100644 --- a/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py +++ b/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py @@ -1,22 +1,24 @@ import argparse import itertools -from tqdm import tqdm -import dgl -import dgl.nn as dglnn -from dgl.nn import HeteroEmbedding -from dgl import Compose, AddReverse, ToSimple import torch as th import torch.nn as nn import torch.nn.functional as F from ogb.nodeproppred import DglNodePropPredDataset, Evaluator +from tqdm import tqdm + +import dgl +import dgl.nn as dglnn +from dgl import AddReverse, Compose, ToSimple +from dgl.nn import HeteroEmbedding + def prepare_data(args): dataset = DglNodePropPredDataset(name="ogbn-mag") split_idx = dataset.get_idx_split() # graph: dgl graph object, label: torch tensor of shape (num_nodes, num_tasks) g, labels = dataset[0] - labels = labels['paper'].flatten() + labels = labels["paper"].flatten() transform = Compose([ToSimple(), AddReverse()]) g = transform(g) @@ -28,34 +30,38 @@ def prepare_data(args): # train sampler sampler = dgl.dataloading.MultiLayerNeighborSampler([25, 20]) train_loader = dgl.dataloading.DataLoader( - g, split_idx['train'], sampler, - batch_size=1024, shuffle=True, num_workers=0) + g, + split_idx["train"], + sampler, + batch_size=1024, + shuffle=True, + num_workers=0, + ) return g, labels, dataset.num_classes, split_idx, logger, train_loader + def extract_embed(node_embed, input_nodes): - emb = node_embed({ - ntype: input_nodes[ntype] for ntype in input_nodes if ntype != 'paper' - }) + emb = node_embed( + {ntype: input_nodes[ntype] for ntype in input_nodes if ntype != "paper"} + ) return emb + def rel_graph_embed(graph, embed_size): node_num = {} for ntype in graph.ntypes: - if ntype == 'paper': + if ntype == "paper": continue node_num[ntype] = graph.num_nodes(ntype) embeds = HeteroEmbedding(node_num, embed_size) return embeds + class RelGraphConvLayer(nn.Module): - def __init__(self, - in_feat, - out_feat, - ntypes, - rel_names, - activation=None, - dropout=0.0): + def __init__( + self, in_feat, out_feat, ntypes, rel_names, activation=None, dropout=0.0 + ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat @@ -63,21 +69,29 @@ def __init__(self, self.rel_names = rel_names self.activation = activation - self.conv = dglnn.HeteroGraphConv({ - rel : dglnn.GraphConv(in_feat, out_feat, norm='right', weight=False, bias=False) + self.conv = dglnn.HeteroGraphConv( + { + rel: dglnn.GraphConv( + in_feat, out_feat, norm="right", weight=False, bias=False + ) for rel in rel_names - }) + } + ) - self.weight = nn.ModuleDict({ - rel_name: nn.Linear(in_feat, out_feat, bias=False) - for rel_name in self.rel_names - }) + self.weight = nn.ModuleDict( + { + rel_name: nn.Linear(in_feat, out_feat, bias=False) + for rel_name in self.rel_names + } + ) # weight for self loop - self.loop_weights = nn.ModuleDict({ - ntype: nn.Linear(in_feat, out_feat, bias=True) - for ntype in self.ntypes - }) + self.loop_weights = nn.ModuleDict( + { + ntype: nn.Linear(in_feat, out_feat, bias=True) + for ntype in self.ntypes + } + ) self.dropout = nn.Dropout(dropout) self.reset_parameters() @@ -104,10 +118,14 @@ def forward(self, g, inputs): New node features for each node type. """ g = g.local_var() - wdict = {rel_name: {'weight': self.weight[rel_name].weight.T} - for rel_name in self.rel_names} + wdict = { + rel_name: {"weight": self.weight[rel_name].weight.T} + for rel_name in self.rel_names + } - inputs_dst = {k: v[:g.number_of_dst_nodes(k)] for k, v in inputs.items()} + inputs_dst = { + k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() + } hs = self.conv(g, inputs, mod_kwargs=wdict) @@ -117,7 +135,8 @@ def _apply(ntype, h): h = self.activation(h) return self.dropout(h) - return {ntype : _apply(ntype, h) for ntype, h in hs.items()} + return {ntype: _apply(ntype, h) for ntype, h in hs.items()} + class EntityClassify(nn.Module): def __init__(self, g, in_dim, out_dim): @@ -131,14 +150,27 @@ def __init__(self, g, in_dim, out_dim): self.layers = nn.ModuleList() # i2h - self.layers.append(RelGraphConvLayer( - self.in_dim, self.h_dim, g.ntypes, self.rel_names, - activation=F.relu, dropout=self.dropout)) + self.layers.append( + RelGraphConvLayer( + self.in_dim, + self.h_dim, + g.ntypes, + self.rel_names, + activation=F.relu, + dropout=self.dropout, + ) + ) # h2o - self.layers.append(RelGraphConvLayer( - self.h_dim, self.out_dim, g.ntypes, self.rel_names, - activation=None)) + self.layers.append( + RelGraphConvLayer( + self.h_dim, + self.out_dim, + g.ntypes, + self.rel_names, + activation=None, + ) + ) def reset_parameters(self): for layer in self.layers: @@ -149,6 +181,7 @@ def forward(self, h, blocks): h = layer(block, h) return h + class Logger(object): r""" This class was taken directly from the PyG implementation and can be found @@ -156,6 +189,7 @@ class Logger(object): This was done to ensure that performance was measured in precisely the same way """ + def __init__(self, runs): self.results = [[] for _ in range(runs)] @@ -168,11 +202,11 @@ def print_statistics(self, run=None): if run is not None: result = 100 * th.tensor(self.results[run]) argmax = result[:, 1].argmax().item() - print(f'Run {run + 1:02d}:') - print(f'Highest Train: {result[:, 0].max():.2f}') - print(f'Highest Valid: {result[:, 1].max():.2f}') - print(f' Final Train: {result[argmax, 0]:.2f}') - print(f' Final Test: {result[argmax, 2]:.2f}') + print(f"Run {run + 1:02d}:") + print(f"Highest Train: {result[:, 0].max():.2f}") + print(f"Highest Valid: {result[:, 1].max():.2f}") + print(f" Final Train: {result[argmax, 0]:.2f}") + print(f" Final Test: {result[argmax, 2]:.2f}") else: result = 100 * th.tensor(self.results) @@ -186,39 +220,54 @@ def print_statistics(self, run=None): best_result = th.tensor(best_results) - print(f'All runs:') + print(f"All runs:") r = best_result[:, 0] - print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}') + print(f"Highest Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 1] - print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}') + print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 2] - print(f' Final Train: {r.mean():.2f} ± {r.std():.2f}') + print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 3] - print(f' Final Test: {r.mean():.2f} ± {r.std():.2f}') - -def train(g, model, node_embed, optimizer, train_loader, split_idx, - labels, logger, device, run): + print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}") + + +def train( + g, + model, + node_embed, + optimizer, + train_loader, + split_idx, + labels, + logger, + device, + run, +): print("start training...") - category = 'paper' + category = "paper" for epoch in range(3): - num_train = split_idx['train'][category].shape[0] + num_train = split_idx["train"][category].shape[0] pbar = tqdm(total=num_train) - pbar.set_description(f'Epoch {epoch:02d}') + pbar.set_description(f"Epoch {epoch:02d}") model.train() total_loss = 0 for input_nodes, seeds, blocks in train_loader: blocks = [blk.to(device) for blk in blocks] - seeds = seeds[category] # we only predict the nodes with type "category" + seeds = seeds[ + category + ] # we only predict the nodes with type "category" batch_size = seeds.shape[0] emb = extract_embed(node_embed, input_nodes) # Add the batch's raw "paper" features - emb.update({'paper': g.ndata['feat']['paper'][input_nodes['paper']]}) + emb.update( + {"paper": g.ndata["feat"]["paper"][input_nodes["paper"]]} + ) - emb = {k : e.to(device) for k, e in emb.items()} + emb = {k: e.to(device) for k, e in emb.items()} lbl = labels[seeds].to(device) optimizer.zero_grad() @@ -238,41 +287,51 @@ def train(g, model, node_embed, optimizer, train_loader, split_idx, result = test(g, model, node_embed, labels, device, split_idx) logger.add_result(run, result) train_acc, valid_acc, test_acc = result - print(f'Run: {run + 1:02d}, ' - f'Epoch: {epoch +1 :02d}, ' - f'Loss: {loss:.4f}, ' - f'Train: {100 * train_acc:.2f}%, ' - f'Valid: {100 * valid_acc:.2f}%, ' - f'Test: {100 * test_acc:.2f}%') + print( + f"Run: {run + 1:02d}, " + f"Epoch: {epoch +1 :02d}, " + f"Loss: {loss:.4f}, " + f"Train: {100 * train_acc:.2f}%, " + f"Valid: {100 * valid_acc:.2f}%, " + f"Test: {100 * test_acc:.2f}%" + ) return logger + @th.no_grad() def test(g, model, node_embed, y_true, device, split_idx): model.eval() - category = 'paper' - evaluator = Evaluator(name='ogbn-mag') + category = "paper" + evaluator = Evaluator(name="ogbn-mag") # 2 GNN layers sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) loader = dgl.dataloading.DataLoader( - g, {'paper': th.arange(g.num_nodes('paper'))}, sampler, - batch_size=16384, shuffle=False, num_workers=0) + g, + {"paper": th.arange(g.num_nodes("paper"))}, + sampler, + batch_size=16384, + shuffle=False, + num_workers=0, + ) pbar = tqdm(total=y_true.size(0)) - pbar.set_description(f'Inference') + pbar.set_description(f"Inference") y_hats = list() for input_nodes, seeds, blocks in loader: blocks = [blk.to(device) for blk in blocks] - seeds = seeds[category] # we only predict the nodes with type "category" + seeds = seeds[ + category + ] # we only predict the nodes with type "category" batch_size = seeds.shape[0] emb = extract_embed(node_embed, input_nodes) # Get the batch's raw "paper" features - emb.update({'paper': g.ndata['feat']['paper'][input_nodes['paper']]}) - emb = {k : e.to(device) for k, e in emb.items()} + emb.update({"paper": g.ndata["feat"]["paper"][input_nodes["paper"]]}) + emb = {k: e.to(device) for k, e in emb.items()} logits = model(emb, blocks)[category] y_hat = logits.log_softmax(dim=-1).argmax(dim=1, keepdims=True) @@ -285,31 +344,42 @@ def test(g, model, node_embed, y_true, device, split_idx): y_pred = th.cat(y_hats, dim=0) y_true = th.unsqueeze(y_true, 1) - train_acc = evaluator.eval({ - 'y_true': y_true[split_idx['train']['paper']], - 'y_pred': y_pred[split_idx['train']['paper']], - })['acc'] - valid_acc = evaluator.eval({ - 'y_true': y_true[split_idx['valid']['paper']], - 'y_pred': y_pred[split_idx['valid']['paper']], - })['acc'] - test_acc = evaluator.eval({ - 'y_true': y_true[split_idx['test']['paper']], - 'y_pred': y_pred[split_idx['test']['paper']], - })['acc'] + train_acc = evaluator.eval( + { + "y_true": y_true[split_idx["train"]["paper"]], + "y_pred": y_pred[split_idx["train"]["paper"]], + } + )["acc"] + valid_acc = evaluator.eval( + { + "y_true": y_true[split_idx["valid"]["paper"]], + "y_pred": y_pred[split_idx["valid"]["paper"]], + } + )["acc"] + test_acc = evaluator.eval( + { + "y_true": y_true[split_idx["test"]["paper"]], + "y_pred": y_pred[split_idx["test"]["paper"]], + } + )["acc"] return train_acc, valid_acc, test_acc + def main(args): - device = f'cuda:0' if th.cuda.is_available() else 'cpu' + device = f"cuda:0" if th.cuda.is_available() else "cpu" g, labels, num_classes, split_idx, logger, train_loader = prepare_data(args) embed_layer = rel_graph_embed(g, 128) model = EntityClassify(g, 128, num_classes).to(device) - print(f"Number of embedding parameters: {sum(p.numel() for p in embed_layer.parameters())}") - print(f"Number of model parameters: {sum(p.numel() for p in model.parameters())}") + print( + f"Number of embedding parameters: {sum(p.numel() for p in embed_layer.parameters())}" + ) + print( + f"Number of model parameters: {sum(p.numel() for p in model.parameters())}" + ) for run in range(args.runs): @@ -317,19 +387,32 @@ def main(args): model.reset_parameters() # optimizer - all_params = itertools.chain(model.parameters(), embed_layer.parameters()) + all_params = itertools.chain( + model.parameters(), embed_layer.parameters() + ) optimizer = th.optim.Adam(all_params, lr=0.01) - logger = train(g, model, embed_layer, optimizer, train_loader, split_idx, - labels, logger, device, run) + logger = train( + g, + model, + embed_layer, + optimizer, + train_loader, + split_idx, + labels, + logger, + device, + run, + ) logger.print_statistics(run) print("Final performance: ") logger.print_statistics() -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='RGCN') - parser.add_argument('--runs', type=int, default=10) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="RGCN") + parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() diff --git a/examples/pytorch/ogb/ogbn-products/gat/gat.py b/examples/pytorch/ogb/ogbn-products/gat/gat.py index 2eb7deac76cf..e0690bb26549 100755 --- a/examples/pytorch/ogb/ogbn-products/gat/gat.py +++ b/examples/pytorch/ogb/ogbn-products/gat/gat.py @@ -7,21 +7,24 @@ import time from collections import OrderedDict -import dgl -import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim -from dgl.dataloading import MultiLayerFullNeighborSampler, MultiLayerNeighborSampler -from dgl.dataloading import DataLoader from matplotlib.ticker import AutoMinorLocator, MultipleLocator +from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn from tqdm import tqdm -from models import GAT +import dgl +import dgl.function as fn +from dgl.dataloading import ( + DataLoader, + MultiLayerFullNeighborSampler, + MultiLayerNeighborSampler, +) epsilon = 1 - math.log(2) @@ -46,7 +49,11 @@ def load_data(dataset): evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] graph.ndata["labels"] = labels @@ -61,10 +68,14 @@ def preprocess(graph, labels, train_idx): # graph = graph.remove_self_loop().add_self_loop() n_node_feats = graph.ndata["feat"].shape[-1] - graph.ndata["train_labels_onehot"] = torch.zeros(graph.number_of_nodes(), n_classes) + graph.ndata["train_labels_onehot"] = torch.zeros( + graph.number_of_nodes(), n_classes + ) graph.ndata["train_labels_onehot"][train_idx, labels[train_idx, 0]] = 1 - graph.ndata["is_train"] = torch.zeros(graph.number_of_nodes(), dtype=torch.bool) + graph.ndata["is_train"] = torch.zeros( + graph.number_of_nodes(), dtype=torch.bool + ) graph.ndata["is_train"][train_idx] = 1 graph.create_formats_() @@ -112,12 +123,18 @@ def add_soft_labels(graph, soft_labels): def update_hard_labels(graph, idx=None): if idx is None: - idx = torch.arange(graph.srcdata["is_train"].shape[0])[graph.srcdata["is_train"]] + idx = torch.arange(graph.srcdata["is_train"].shape[0])[ + graph.srcdata["is_train"] + ] - graph.srcdata["feat"][idx, -n_classes:] = graph.srcdata["train_labels_onehot"][idx] + graph.srcdata["feat"][idx, -n_classes:] = graph.srcdata[ + "train_labels_onehot" + ][idx] -def train(args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator): +def train( + args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator +): model.train() loss_sum, total = 0, 0 @@ -133,10 +150,18 @@ def train(args, model, dataloader, labels, train_idx, criterion, optimizer, eval if args.use_labels: mask = torch.rand(new_train_idx.shape) < args.mask_rate - train_labels_idx = torch.cat([new_train_idx[~mask], torch.arange(len(output_nodes), len(input_nodes))]) + train_labels_idx = torch.cat( + [ + new_train_idx[~mask], + torch.arange(len(output_nodes), len(input_nodes)), + ] + ) train_pred_idx = new_train_idx[mask] - add_soft_labels(subgraphs[0], F.softmax(preds_old[input_nodes].to(device), dim=-1)) + add_soft_labels( + subgraphs[0], + F.softmax(preds_old[input_nodes].to(device), dim=-1), + ) update_hard_labels(subgraphs[0], train_labels_idx) else: train_pred_idx = new_train_idx @@ -148,7 +173,10 @@ def train(args, model, dataloader, labels, train_idx, criterion, optimizer, eval # NOTE: This is not a complete implementation of label reuse, since it is too expensive # to predict the nodes in validation and test set during training time. if it == args.n_label_iters: - loss = criterion(pred[train_pred_idx], subgraphs[-1].dstdata["labels"][train_pred_idx]) + loss = criterion( + pred[train_pred_idx], + subgraphs[-1].dstdata["labels"][train_pred_idx], + ) optimizer.zero_grad() loss.backward() optimizer.step() @@ -166,7 +194,17 @@ def train(args, model, dataloader, labels, train_idx, criterion, optimizer, eval @torch.no_grad() -def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator): +def evaluate( + args, + model, + dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator, +): model.eval() # Due to the limitation of memory capacity, we calculate the average of logits 'eval_times' times. @@ -182,7 +220,10 @@ def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, crit subgraphs = [b.to(device) for b in subgraphs] if args.use_labels: - add_soft_labels(subgraphs[0], F.softmax(preds_old[input_nodes].to(device), dim=-1)) + add_soft_labels( + subgraphs[0], + F.softmax(preds_old[input_nodes].to(device), dim=-1), + ) update_hard_labels(subgraphs[0]) pred = model(subgraphs, inference=True) @@ -209,7 +250,9 @@ def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, crit ) -def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): +def run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running +): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] @@ -217,37 +260,52 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) n_train_samples = train_idx.shape[0] train_batch_size = (n_train_samples + 29) // 30 - train_sampler = MultiLayerNeighborSampler([10 for _ in range(args.n_layers)]) - train_dataloader = DataLoader( + train_sampler = MultiLayerNeighborSampler( + [10 for _ in range(args.n_layers)] + ) + train_dataloader = DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, - batch_size=train_batch_size, shuffle=True, - num_workers=4, + batch_size=train_batch_size, + shuffle=True, + num_workers=4, ) eval_batch_size = 32768 eval_sampler = MultiLayerNeighborSampler([15 for _ in range(args.n_layers)]) if args.estimation_mode: - test_idx_during_training = test_idx[torch.arange(start=0, end=len(test_idx), step=45)] + test_idx_during_training = test_idx[ + torch.arange(start=0, end=len(test_idx), step=45) + ] else: test_idx_during_training = test_idx - eval_idx = torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx_during_training.cpu()]) - eval_dataloader = DataLoader( + eval_idx = torch.cat( + [train_idx.cpu(), val_idx.cpu(), test_idx_during_training.cpu()] + ) + eval_dataloader = DataLoader( graph.cpu(), eval_idx, eval_sampler, - batch_size=eval_batch_size, shuffle=False, - num_workers=4, + batch_size=eval_batch_size, + shuffle=False, + num_workers=4, ) model = gen_model(args).to(device) - optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) + optimizer = optim.AdamW( + model.parameters(), lr=args.lr, weight_decay=args.wd + ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( - optimizer, mode="max", factor=0.7, patience=20, verbose=True, min_lr=1e-4 + optimizer, + mode="max", + factor=0.7, + patience=20, + verbose=True, + min_lr=1e-4, ) best_model_state_dict = None @@ -261,13 +319,33 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) for epoch in range(1, args.n_epochs + 1): tic = time.time() - score, loss = train(args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper) + score, loss = train( + args, + model, + train_dataloader, + labels, + train_idx, + criterion, + optimizer, + evaluator_wrapper, + ) toc = time.time() total_time += toc - tic - if epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0: - train_score, val_score, test_score, train_loss, val_loss, test_loss = evaluate( + if ( + epoch == args.n_epochs + or epoch % args.eval_every == 0 + or epoch % args.log_every == 0 + ): + ( + train_score, + val_score, + test_score, + train_loss, + val_loss, + test_loss, + ) = evaluate( args, model, eval_dataloader, @@ -283,7 +361,9 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) best_val_score = val_score final_test_score = test_score if args.estimation_mode: - best_model_state_dict = {k: v.to("cpu") for k, v in model.state_dict().items()} + best_model_state_dict = { + k: v.to("cpu") for k, v in model.state_dict().items() + } if epoch == args.n_epochs or epoch % args.log_every == 0: print( @@ -294,8 +374,26 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ) for l, e in zip( - [scores, train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses], - [score, train_score, val_score, test_score, loss, train_loss, val_loss, test_loss], + [ + scores, + train_scores, + val_scores, + test_scores, + losses, + train_losses, + val_losses, + test_losses, + ], + [ + score, + train_score, + val_score, + test_score, + loss, + train_loss, + val_loss, + test_loss, + ], ): l.append(e) @@ -303,19 +401,30 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) if args.estimation_mode: model.load_state_dict(best_model_state_dict) - eval_dataloader = DataLoader( + eval_dataloader = DataLoader( graph.cpu(), test_idx.cpu(), eval_sampler, - batch_size=eval_batch_size, shuffle=False, - num_workers=4, + batch_size=eval_batch_size, + shuffle=False, + num_workers=4, ) final_test_score = evaluate( - args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper + args, + model, + eval_dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator_wrapper, )[2] print("*" * 50) - print(f"Best val score: {best_val_score}, Final test score: {final_test_score}") + print( + f"Best val score: {best_val_score}, Final test score: {final_test_score}" + ) print("*" * 50) if args.plot_curves: @@ -324,8 +433,16 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) - for y, label in zip([train_scores, val_scores, test_scores], ["train score", "val score", "test score"]): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + for y, label in zip( + [train_scores, val_scores, test_scores], + ["train score", "val score", "test score"], + ): + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) @@ -341,9 +458,15 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( - [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"] + [losses, train_losses, val_losses, test_losses], + ["loss", "train loss", "val loss", "test loss"], ): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) @@ -359,41 +482,87 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) def count_parameters(args): model = gen_model(args) - return sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad]) + return sum( + [np.prod(p.size()) for p in model.parameters() if p.requires_grad] + ) def main(): global device argparser = argparse.ArgumentParser( - "GAT implementation on ogbn-products", formatter_class=argparse.ArgumentDefaultsHelpFormatter + "GAT implementation on ogbn-products", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides '--gpu'.", ) - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.") argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID") argparser.add_argument("--seed", type=int, default=0, help="seed") - argparser.add_argument("--n-runs", type=int, default=10, help="running times") - argparser.add_argument("--n-epochs", type=int, default=250, help="number of epochs") argparser.add_argument( - "--use-labels", action="store_true", help="Use labels in the training set as input features." - ) - argparser.add_argument("--n-label-iters", type=int, default=0, help="number of label iterations") - argparser.add_argument("--no-attn-dst", action="store_true", help="Don't use attn_dst.") - argparser.add_argument("--mask-rate", type=float, default=0.5, help="mask rate") - argparser.add_argument("--n-heads", type=int, default=4, help="number of heads") - argparser.add_argument("--lr", type=float, default=0.01, help="learning rate") - argparser.add_argument("--n-layers", type=int, default=3, help="number of layers") - argparser.add_argument("--n-hidden", type=int, default=120, help="number of hidden units") - argparser.add_argument("--dropout", type=float, default=0.5, help="dropout rate") - argparser.add_argument("--input-drop", type=float, default=0.1, help="input drop rate") - argparser.add_argument("--attn-dropout", type=float, default=0.0, help="attention drop rate") - argparser.add_argument("--edge-drop", type=float, default=0.1, help="edge drop rate") + "--n-runs", type=int, default=10, help="running times" + ) + argparser.add_argument( + "--n-epochs", type=int, default=250, help="number of epochs" + ) + argparser.add_argument( + "--use-labels", + action="store_true", + help="Use labels in the training set as input features.", + ) + argparser.add_argument( + "--n-label-iters", + type=int, + default=0, + help="number of label iterations", + ) + argparser.add_argument( + "--no-attn-dst", action="store_true", help="Don't use attn_dst." + ) + argparser.add_argument( + "--mask-rate", type=float, default=0.5, help="mask rate" + ) + argparser.add_argument( + "--n-heads", type=int, default=4, help="number of heads" + ) + argparser.add_argument( + "--lr", type=float, default=0.01, help="learning rate" + ) + argparser.add_argument( + "--n-layers", type=int, default=3, help="number of layers" + ) + argparser.add_argument( + "--n-hidden", type=int, default=120, help="number of hidden units" + ) + argparser.add_argument( + "--dropout", type=float, default=0.5, help="dropout rate" + ) + argparser.add_argument( + "--input-drop", type=float, default=0.1, help="input drop rate" + ) + argparser.add_argument( + "--attn-dropout", type=float, default=0.0, help="attention drop rate" + ) + argparser.add_argument( + "--edge-drop", type=float, default=0.1, help="edge drop rate" + ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") - argparser.add_argument("--eval-every", type=int, default=2, help="log every EVAL_EVERY epochs") argparser.add_argument( - "--estimation-mode", action="store_true", help="Estimate the score of test set for speed during training." + "--eval-every", type=int, default=2, help="log every EVAL_EVERY epochs" + ) + argparser.add_argument( + "--estimation-mode", + action="store_true", + help="Estimate the score of test set for speed during training.", + ) + argparser.add_argument( + "--log-every", type=int, default=2, help="log every LOG_EVERY epochs" + ) + argparser.add_argument( + "--plot-curves", action="store_true", help="plot learning curves" ) - argparser.add_argument("--log-every", type=int, default=2, help="log every LOG_EVERY epochs") - argparser.add_argument("--plot-curves", action="store_true", help="plot learning curves") args = argparser.parse_args() if args.cpu: @@ -405,14 +574,18 @@ def main(): graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) graph, labels = preprocess(graph, labels, train_idx) - labels, train_idx, val_idx, test_idx = map(lambda x: x.to(device), (labels, train_idx, val_idx, test_idx)) + labels, train_idx, val_idx, test_idx = map( + lambda x: x.to(device), (labels, train_idx, val_idx, test_idx) + ) # run val_scores, test_scores = [], [] for i in range(1, args.n_runs + 1): seed(args.seed + i) - val_score, test_score = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i) + val_score, test_score = run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + ) val_scores.append(val_score) test_scores.append(test_score) diff --git a/examples/pytorch/ogb/ogbn-products/gat/main.py b/examples/pytorch/ogb/ogbn-products/gat/main.py index abcaee4a44e0..ab4c28342941 100644 --- a/examples/pytorch/ogb/ogbn-products/gat/main.py +++ b/examples/pytorch/ogb/ogbn-products/gat/main.py @@ -1,35 +1,52 @@ -import dgl +import argparse +import time + import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim -import dgl.nn.pytorch as dglnn -import time -import argparse import tqdm from ogb.nodeproppred import DglNodePropPredDataset +import dgl +import dgl.nn.pytorch as dglnn + class GAT(nn.Module): - def __init__(self, - in_feats, - n_hidden, - n_classes, - n_layers, - num_heads, - activation): + def __init__( + self, in_feats, n_hidden, n_classes, n_layers, num_heads, activation + ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() - self.layers.append(dglnn.GATConv((in_feats, in_feats), n_hidden, num_heads=num_heads, activation=activation)) + self.layers.append( + dglnn.GATConv( + (in_feats, in_feats), + n_hidden, + num_heads=num_heads, + activation=activation, + ) + ) for i in range(1, n_layers - 1): - self.layers.append(dglnn.GATConv((n_hidden * num_heads, n_hidden * num_heads), n_hidden, - num_heads=num_heads, activation=activation)) - self.layers.append(dglnn.GATConv((n_hidden * num_heads, n_hidden * num_heads), n_classes, - num_heads=num_heads, activation=None)) + self.layers.append( + dglnn.GATConv( + (n_hidden * num_heads, n_hidden * num_heads), + n_hidden, + num_heads=num_heads, + activation=activation, + ) + ) + self.layers.append( + dglnn.GATConv( + (n_hidden * num_heads, n_hidden * num_heads), + n_classes, + num_heads=num_heads, + activation=None, + ) + ) def forward(self, blocks, x): h = x @@ -38,7 +55,7 @@ def forward(self, blocks, x): # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) - h_dst = h[:block.num_dst_nodes()] + h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) if l < self.n_layers - 1: @@ -63,9 +80,19 @@ def inference(self, g, x, num_heads, device): # TODO: can we standardize this? for l, layer in enumerate(self.layers): if l < self.n_layers - 1: - y = th.zeros(g.num_nodes(), self.n_hidden * num_heads if l != len(self.layers) - 1 else self.n_classes) + y = th.zeros( + g.num_nodes(), + self.n_hidden * num_heads + if l != len(self.layers) - 1 + else self.n_classes, + ) else: - y = th.zeros(g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) + y = th.zeros( + g.num_nodes(), + self.n_hidden + if l != len(self.layers) - 1 + else self.n_classes, + ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( @@ -75,15 +102,16 @@ def inference(self, g, x, num_heads, device): batch_size=args.batch_size, shuffle=True, drop_last=False, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes].to(device) - h_dst = h[:block.num_dst_nodes()] + h_dst = h[: block.num_dst_nodes()] if l < self.n_layers - 1: - h = layer(block, (h, h_dst)).flatten(1) + h = layer(block, (h, h_dst)).flatten(1) else: h = layer(block, (h, h_dst)) h = h.mean(1) @@ -94,12 +122,14 @@ def inference(self, g, x, num_heads, device): x = y return y.to(device) + def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) + def evaluate(model, g, nfeat, labels, val_nid, test_nid, num_heads, device): """ Evaluate the model on the validation set specified by ``val_mask``. @@ -114,7 +144,12 @@ def evaluate(model, g, nfeat, labels, val_nid, test_nid, num_heads, device): with th.no_grad(): pred = model.inference(g, nfeat, num_heads, device) model.train() - return compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred + return ( + compute_acc(pred[val_nid], labels[val_nid]), + compute_acc(pred[test_nid], labels[test_nid]), + pred, + ) + def load_subtensor(nfeat, labels, seeds, input_nodes): """ @@ -124,14 +159,26 @@ def load_subtensor(nfeat, labels, seeds, input_nodes): batch_labels = labels[seeds] return batch_inputs, batch_labels + #### Entry point def run(args, device, data): # Unpack data - train_nid, val_nid, test_nid, in_feats, labels, n_classes, nfeat, g, num_heads = data + ( + train_nid, + val_nid, + test_nid, + in_feats, + labels, + n_classes, + nfeat, + g, + num_heads, + ) = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( - [int(fanout) for fanout in args.fan_out.split(',')]) + [int(fanout) for fanout in args.fan_out.split(",")] + ) dataloader = dgl.dataloading.DataLoader( g, train_nid, @@ -139,10 +186,13 @@ def run(args, device, data): batch_size=args.batch_size, shuffle=True, drop_last=False, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) # Define model and optimizer - model = GAT(in_feats, args.num_hidden, n_classes, args.num_layers, num_heads, F.relu) + model = GAT( + in_feats, args.num_hidden, n_classes, args.num_layers, num_heads, F.relu + ) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) @@ -163,7 +213,9 @@ def run(args, device, data): blocks = [blk.to(device) for blk in blocks] # Load the input features as well as output labels - batch_inputs, batch_labels = load_subtensor(nfeat, labels, seeds, input_nodes) + batch_inputs, batch_labels = load_subtensor( + nfeat, labels, seeds, input_nodes + ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) @@ -175,63 +227,98 @@ def run(args, device, data): iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) - gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 - print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( - epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) + gpu_mem_alloc = ( + th.cuda.max_memory_allocated() / 1000000 + if th.cuda.is_available() + else 0 + ) + print( + "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB".format( + epoch, + step, + loss.item(), + acc.item(), + np.mean(iter_tput[3:]), + gpu_mem_alloc, + ) + ) toc = time.time() - print('Epoch Time(s): {:.4f}'.format(toc - tic)) + print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: - eval_acc, test_acc, pred = evaluate(model, g, nfeat, labels, val_nid, test_nid, num_heads, device) + eval_acc, test_acc, pred = evaluate( + model, g, nfeat, labels, val_nid, test_nid, num_heads, device + ) if args.save_pred: - np.savetxt(args.save_pred + '%02d' % epoch, pred.argmax(1).cpu().numpy(), '%d') - print('Eval Acc {:.4f}'.format(eval_acc)) + np.savetxt( + args.save_pred + "%02d" % epoch, + pred.argmax(1).cpu().numpy(), + "%d", + ) + print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc - print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format(best_eval_acc, best_test_acc)) + print( + "Best Eval Acc {:.4f} Test Acc {:.4f}".format( + best_eval_acc, best_test_acc + ) + ) - print('Avg epoch time: {}'.format(avg / (epoch - 4))) + print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc -if __name__ == '__main__': + +if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") - argparser.add_argument('--gpu', type=int, default=0, - help="GPU device ID. Use -1 for CPU training") - argparser.add_argument('--num-epochs', type=int, default=100) - argparser.add_argument('--num-hidden', type=int, default=128) - argparser.add_argument('--num-layers', type=int, default=3) - argparser.add_argument('--fan-out', type=str, default='10,10,10') - argparser.add_argument('--batch-size', type=int, default=512) - argparser.add_argument('--val-batch-size', type=int, default=512) - argparser.add_argument('--log-every', type=int, default=20) - argparser.add_argument('--eval-every', type=int, default=1) - argparser.add_argument('--lr', type=float, default=0.001) - argparser.add_argument('--num-workers', type=int, default=8, - help="Number of sampling processes. Use 0 for no extra process.") - argparser.add_argument('--save-pred', type=str, default='') - argparser.add_argument('--head', type=int, default=4) - argparser.add_argument('--wd', type=float, default=0) + argparser.add_argument( + "--gpu", + type=int, + default=0, + help="GPU device ID. Use -1 for CPU training", + ) + argparser.add_argument("--num-epochs", type=int, default=100) + argparser.add_argument("--num-hidden", type=int, default=128) + argparser.add_argument("--num-layers", type=int, default=3) + argparser.add_argument("--fan-out", type=str, default="10,10,10") + argparser.add_argument("--batch-size", type=int, default=512) + argparser.add_argument("--val-batch-size", type=int, default=512) + argparser.add_argument("--log-every", type=int, default=20) + argparser.add_argument("--eval-every", type=int, default=1) + argparser.add_argument("--lr", type=float, default=0.001) + argparser.add_argument( + "--num-workers", + type=int, + default=8, + help="Number of sampling processes. Use 0 for no extra process.", + ) + argparser.add_argument("--save-pred", type=str, default="") + argparser.add_argument("--head", type=int, default=4) + argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() - + if args.gpu >= 0: - device = th.device('cuda:%d' % args.gpu) + device = th.device("cuda:%d" % args.gpu) else: - device = th.device('cpu') + device = th.device("cpu") # load data - data = DglNodePropPredDataset(name='ogbn-products') + data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test'] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] - nfeat = graph.ndata.pop('feat').to(device) + nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) - print('Total edges before adding self-loop {}'.format(graph.num_edges())) + print("Total edges before adding self-loop {}".format(graph.num_edges())) graph = graph.remove_self_loop().add_self_loop() - print('Total edges after adding self-loop {}'.format(graph.num_edges())) + print("Total edges after adding self-loop {}".format(graph.num_edges())) in_feats = nfeat.shape[1] n_classes = (labels.max() + 1).item() @@ -240,10 +327,22 @@ def run(args, device, data): # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data - data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph, args.head + data = ( + train_idx, + val_idx, + test_idx, + in_feats, + labels, + n_classes, + nfeat, + graph, + args.head, + ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data).cpu().numpy()) - print('Average test accuracy:', np.mean(test_accs), '±', np.std(test_accs)) + print( + "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) + ) diff --git a/examples/pytorch/ogb/ogbn-products/gat/models.py b/examples/pytorch/ogb/ogbn-products/gat/models.py index 81751e1fac0d..3dee4fdac0b7 100644 --- a/examples/pytorch/ogb/ogbn-products/gat/models.py +++ b/examples/pytorch/ogb/ogbn-products/gat/models.py @@ -2,6 +2,7 @@ import torch import torch.nn as nn import torch.nn.functional as F + from dgl import function as fn from dgl.ops import edge_softmax from dgl.utils import expand_as_pair @@ -31,7 +32,9 @@ def __init__( self._use_symmetric_norm = use_symmetric_norm # feat fc - self.src_fc = nn.Linear(self._in_src_feats, out_feats * n_heads, bias=False) + self.src_fc = nn.Linear( + self._in_src_feats, out_feats * n_heads, bias=False + ) if residual: self.dst_fc = nn.Linear(self._in_src_feats, out_feats * n_heads) self.bias = None @@ -42,7 +45,9 @@ def __init__( # attn fc self.attn_src_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) if use_attn_dst: - self.attn_dst_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) + self.attn_dst_fc = nn.Linear( + self._in_src_feats, n_heads, bias=False + ) else: self.attn_dst_fc = None if edge_feats > 0: @@ -93,8 +98,12 @@ def forward(self, graph, feat_src, feat_edge=None): norm = torch.reshape(norm, shp) feat_src = feat_src * norm - feat_src_fc = self.src_fc(feat_src).view(-1, self._n_heads, self._out_feats) - feat_dst_fc = self.dst_fc(feat_dst).view(-1, self._n_heads, self._out_feats) + feat_src_fc = self.src_fc(feat_src).view( + -1, self._n_heads, self._out_feats + ) + feat_dst_fc = self.dst_fc(feat_dst).view( + -1, self._n_heads, self._out_feats + ) attn_src = self.attn_src_fc(feat_src).view(-1, self._n_heads, 1) # NOTE: GAT paper uses "first concatenation then linear projection" @@ -107,18 +116,24 @@ def forward(self, graph, feat_src, feat_edge=None): # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. - graph.srcdata.update({"feat_src_fc": feat_src_fc, "attn_src": attn_src}) + graph.srcdata.update( + {"feat_src_fc": feat_src_fc, "attn_src": attn_src} + ) if self.attn_dst_fc is not None: attn_dst = self.attn_dst_fc(feat_dst).view(-1, self._n_heads, 1) graph.dstdata.update({"attn_dst": attn_dst}) - graph.apply_edges(fn.u_add_v("attn_src", "attn_dst", "attn_node")) + graph.apply_edges( + fn.u_add_v("attn_src", "attn_dst", "attn_node") + ) else: graph.apply_edges(fn.copy_u("attn_src", "attn_node")) e = graph.edata["attn_node"] if feat_edge is not None: - attn_edge = self.attn_edge_fc(feat_edge).view(-1, self._n_heads, 1) + attn_edge = self.attn_edge_fc(feat_edge).view( + -1, self._n_heads, 1 + ) graph.edata.update({"attn_edge": attn_edge}) e += graph.edata["attn_edge"] e = self.leaky_relu(e) @@ -128,12 +143,16 @@ def forward(self, graph, feat_src, feat_edge=None): bound = int(graph.number_of_edges() * self.edge_drop) eids = perm[bound:] graph.edata["a"] = torch.zeros_like(e) - graph.edata["a"][eids] = self.attn_drop(edge_softmax(graph, e[eids], eids=eids)) + graph.edata["a"][eids] = self.attn_drop( + edge_softmax(graph, e[eids], eids=eids) + ) else: graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing - graph.update_all(fn.u_mul_e("feat_src_fc", "a", "m"), fn.sum("m", "feat_src_fc")) + graph.update_all( + fn.u_mul_e("feat_src_fc", "a", "m"), fn.sum("m", "feat_src_fc") + ) rst = graph.dstdata["feat_src_fc"] if self._use_symmetric_norm: @@ -257,7 +276,15 @@ def forward(self, g, inference=False): class MLP(nn.Module): def __init__( - self, in_feats, n_classes, n_layers, n_hidden, activation, dropout=0.0, input_drop=0.0, residual=False, + self, + in_feats, + n_classes, + n_layers, + n_hidden, + activation, + dropout=0.0, + input_drop=0.0, + residual=False, ): super().__init__() self.n_layers = n_layers diff --git a/examples/pytorch/ogb/ogbn-products/graphsage/main.py b/examples/pytorch/ogb/ogbn-products/graphsage/main.py index afe7a5850685..4e36512ab404 100644 --- a/examples/pytorch/ogb/ogbn-products/graphsage/main.py +++ b/examples/pytorch/ogb/ogbn-products/graphsage/main.py @@ -1,32 +1,31 @@ -import dgl +import argparse +import time + import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim -import dgl.nn.pytorch as dglnn -import time -import argparse import tqdm from ogb.nodeproppred import DglNodePropPredDataset +import dgl +import dgl.nn.pytorch as dglnn + + class SAGE(nn.Module): - def __init__(self, - in_feats, - n_hidden, - n_classes, - n_layers, - activation, - dropout): + def __init__( + self, in_feats, n_hidden, n_classes, n_layers, activation, dropout + ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() - self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) + self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): - self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) - self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) + self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) + self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation @@ -37,7 +36,7 @@ def forward(self, blocks, x): # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) - h_dst = h[:block.num_dst_nodes()] + h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst)) @@ -60,7 +59,10 @@ def inference(self, g, x, device): # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): - y = th.zeros(g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes).to(device) + y = th.zeros( + g.num_nodes(), + self.n_hidden if l != len(self.layers) - 1 else self.n_classes, + ).to(device) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( @@ -70,13 +72,14 @@ def inference(self, g, x, device): batch_size=args.batch_size, shuffle=True, drop_last=False, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes] - h_dst = h[:block.num_dst_nodes()] + h_dst = h[: block.num_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) @@ -87,12 +90,14 @@ def inference(self, g, x, device): x = y return y + def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) + def evaluate(model, g, nfeat, labels, val_nid, test_nid, device): """ Evaluate the model on the validation set specified by ``val_mask``. @@ -106,7 +111,12 @@ def evaluate(model, g, nfeat, labels, val_nid, test_nid, device): with th.no_grad(): pred = model.inference(g, nfeat, device) model.train() - return compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred + return ( + compute_acc(pred[val_nid], labels[val_nid]), + compute_acc(pred[test_nid], labels[test_nid]), + pred, + ) + def load_subtensor(nfeat, labels, seeds, input_nodes): """ @@ -116,6 +126,7 @@ def load_subtensor(nfeat, labels, seeds, input_nodes): batch_labels = labels[seeds] return batch_inputs, batch_labels + #### Entry point def run(args, device, data): # Unpack data @@ -123,7 +134,8 @@ def run(args, device, data): # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( - [int(fanout) for fanout in args.fan_out.split(',')]) + [int(fanout) for fanout in args.fan_out.split(",")] + ) dataloader = dgl.dataloading.DataLoader( g, train_nid, @@ -131,10 +143,18 @@ def run(args, device, data): batch_size=args.batch_size, shuffle=True, drop_last=False, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) # Define model and optimizer - model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) + model = SAGE( + in_feats, + args.num_hidden, + n_classes, + args.num_layers, + F.relu, + args.dropout, + ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) @@ -156,7 +176,9 @@ def run(args, device, data): blocks = [blk.int().to(device) for blk in blocks] # Load the input features as well as output labels - batch_inputs, batch_labels = load_subtensor(nfeat, labels, seeds, input_nodes) + batch_inputs, batch_labels = load_subtensor( + nfeat, labels, seeds, input_nodes + ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) @@ -168,58 +190,93 @@ def run(args, device, data): iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) - gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 - print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( - epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) + gpu_mem_alloc = ( + th.cuda.max_memory_allocated() / 1000000 + if th.cuda.is_available() + else 0 + ) + print( + "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB".format( + epoch, + step, + loss.item(), + acc.item(), + np.mean(iter_tput[3:]), + gpu_mem_alloc, + ) + ) toc = time.time() - print('Epoch Time(s): {:.4f}'.format(toc - tic)) + print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: - eval_acc, test_acc, pred = evaluate(model, g, nfeat, labels, val_nid, test_nid, device) + eval_acc, test_acc, pred = evaluate( + model, g, nfeat, labels, val_nid, test_nid, device + ) if args.save_pred: - np.savetxt(args.save_pred + '%02d' % epoch, pred.argmax(1).cpu().numpy(), '%d') - print('Eval Acc {:.4f}'.format(eval_acc)) + np.savetxt( + args.save_pred + "%02d" % epoch, + pred.argmax(1).cpu().numpy(), + "%d", + ) + print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc - print('Best Eval Acc {:.4f} Test Acc {:.4f}'.format(best_eval_acc, best_test_acc)) + print( + "Best Eval Acc {:.4f} Test Acc {:.4f}".format( + best_eval_acc, best_test_acc + ) + ) - print('Avg epoch time: {}'.format(avg / (epoch - 4))) + print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc -if __name__ == '__main__': + +if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") - argparser.add_argument('--gpu', type=int, default=0, - help="GPU device ID. Use -1 for CPU training") - argparser.add_argument('--num-epochs', type=int, default=20) - argparser.add_argument('--num-hidden', type=int, default=256) - argparser.add_argument('--num-layers', type=int, default=3) - argparser.add_argument('--fan-out', type=str, default='5,10,15') - argparser.add_argument('--batch-size', type=int, default=1000) - argparser.add_argument('--val-batch-size', type=int, default=10000) - argparser.add_argument('--log-every', type=int, default=20) - argparser.add_argument('--eval-every', type=int, default=1) - argparser.add_argument('--lr', type=float, default=0.003) - argparser.add_argument('--dropout', type=float, default=0.5) - argparser.add_argument('--num-workers', type=int, default=4, - help="Number of sampling processes. Use 0 for no extra process.") - argparser.add_argument('--save-pred', type=str, default='') - argparser.add_argument('--wd', type=float, default=0) + argparser.add_argument( + "--gpu", + type=int, + default=0, + help="GPU device ID. Use -1 for CPU training", + ) + argparser.add_argument("--num-epochs", type=int, default=20) + argparser.add_argument("--num-hidden", type=int, default=256) + argparser.add_argument("--num-layers", type=int, default=3) + argparser.add_argument("--fan-out", type=str, default="5,10,15") + argparser.add_argument("--batch-size", type=int, default=1000) + argparser.add_argument("--val-batch-size", type=int, default=10000) + argparser.add_argument("--log-every", type=int, default=20) + argparser.add_argument("--eval-every", type=int, default=1) + argparser.add_argument("--lr", type=float, default=0.003) + argparser.add_argument("--dropout", type=float, default=0.5) + argparser.add_argument( + "--num-workers", + type=int, + default=4, + help="Number of sampling processes. Use 0 for no extra process.", + ) + argparser.add_argument("--save-pred", type=str, default="") + argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() - + if args.gpu >= 0: - device = th.device('cuda:%d' % args.gpu) + device = th.device("cuda:%d" % args.gpu) else: - device = th.device('cpu') + device = th.device("cpu") # load ogbn-products data - data = DglNodePropPredDataset(name='ogbn-products') + data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test'] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] - nfeat = graph.ndata.pop('feat').to(device) + nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) in_feats = nfeat.shape[1] @@ -228,10 +285,21 @@ def run(args, device, data): # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data - data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph + data = ( + train_idx, + val_idx, + test_idx, + in_feats, + labels, + n_classes, + nfeat, + graph, + ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data).cpu().numpy()) - print('Average test accuracy:', np.mean(test_accs), '±', np.std(test_accs)) + print( + "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) + ) diff --git a/examples/pytorch/ogb/ogbn-products/mlp/mlp.py b/examples/pytorch/ogb/ogbn-products/mlp/mlp.py index caefc73e943b..ec06937f569c 100755 --- a/examples/pytorch/ogb/ogbn-products/mlp/mlp.py +++ b/examples/pytorch/ogb/ogbn-products/mlp/mlp.py @@ -7,20 +7,23 @@ import time from collections import OrderedDict -import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim -from dgl.dataloading import MultiLayerFullNeighborSampler, MultiLayerNeighborSampler -from dgl.dataloading import DataLoader from matplotlib.ticker import AutoMinorLocator, MultipleLocator +from models import MLP from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn from tqdm import tqdm -from models import MLP +import dgl.function as fn +from dgl.dataloading import ( + DataLoader, + MultiLayerFullNeighborSampler, + MultiLayerNeighborSampler, +) epsilon = 1 - math.log(2) @@ -44,7 +47,11 @@ def load_data(dataset): evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] graph.ndata["labels"] = labels @@ -83,7 +90,9 @@ def custom_loss_function(x, labels): return torch.mean(y) -def train(args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator): +def train( + args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator +): model.train() loss_sum, total = 0, 0 @@ -97,7 +106,9 @@ def train(args, model, dataloader, labels, train_idx, criterion, optimizer, eval pred = model(subgraphs[0].srcdata["feat"]) preds[output_nodes] = pred.cpu().detach() - loss = criterion(pred[new_train_idx], labels[output_nodes][new_train_idx]) + loss = criterion( + pred[new_train_idx], labels[output_nodes][new_train_idx] + ) optimizer.zero_grad() loss.backward() optimizer.step() @@ -114,7 +125,17 @@ def train(args, model, dataloader, labels, train_idx, criterion, optimizer, eval @torch.no_grad() -def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator): +def evaluate( + args, + model, + dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator, +): model.eval() preds = torch.zeros(labels.shape[0], n_classes, device=device) @@ -144,43 +165,56 @@ def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, crit ) -def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): +def run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running +): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] criterion = custom_loss_function train_batch_size = 4096 - train_sampler = MultiLayerNeighborSampler([0 for _ in range(args.n_layers)]) # no not sample neighbors + train_sampler = MultiLayerNeighborSampler( + [0 for _ in range(args.n_layers)] + ) # no not sample neighbors train_dataloader = DataLoader( - graph.cpu(), - train_idx.cpu(), - train_sampler, - batch_size=train_batch_size, - shuffle=True, - num_workers=4 + graph.cpu(), + train_idx.cpu(), + train_sampler, + batch_size=train_batch_size, + shuffle=True, + num_workers=4, ) eval_batch_size = 4096 - eval_sampler = MultiLayerNeighborSampler([0 for _ in range(args.n_layers)]) # no not sample neighbors + eval_sampler = MultiLayerNeighborSampler( + [0 for _ in range(args.n_layers)] + ) # no not sample neighbors if args.eval_last: eval_idx = torch.cat([train_idx.cpu(), val_idx.cpu()]) else: eval_idx = torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx.cpu()]) eval_dataloader = DataLoader( - graph.cpu(), - eval_idx, - eval_sampler, - batch_size=eval_batch_size, - shuffle=False, - num_workers=4 + graph.cpu(), + eval_idx, + eval_sampler, + batch_size=eval_batch_size, + shuffle=False, + num_workers=4, ) model = gen_model(args).to(device) - optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) + optimizer = optim.AdamW( + model.parameters(), lr=args.lr, weight_decay=args.wd + ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( - optimizer, mode="max", factor=0.7, patience=20, verbose=True, min_lr=1e-4 + optimizer, + mode="max", + factor=0.7, + patience=20, + verbose=True, + min_lr=1e-4, ) best_model_state_dict = None @@ -193,21 +227,47 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) for epoch in range(1, args.n_epochs + 1): tic = time.time() - loss, score = train(args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper) + loss, score = train( + args, + model, + train_dataloader, + labels, + train_idx, + criterion, + optimizer, + evaluator_wrapper, + ) toc = time.time() total_time += toc - tic if epoch % args.eval_every == 0 or epoch % args.log_every == 0: - train_score, val_score, test_score, train_loss, val_loss, test_loss = evaluate( - args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper + ( + train_score, + val_score, + test_score, + train_loss, + val_loss, + test_loss, + ) = evaluate( + args, + model, + eval_dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator_wrapper, ) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score if args.eval_last: - best_model_state_dict = {k: v.to("cpu") for k, v in model.state_dict().items()} + best_model_state_dict = { + k: v.to("cpu") for k, v in model.state_dict().items() + } best_model_state_dict = OrderedDict(best_model_state_dict) if epoch % args.log_every == 0: @@ -221,8 +281,26 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ) for l, e in zip( - [scores, train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses], - [score, train_score, val_score, test_score, loss, train_loss, val_loss, test_loss], + [ + scores, + train_scores, + val_scores, + test_scores, + losses, + train_losses, + val_losses, + test_losses, + ], + [ + score, + train_score, + val_score, + test_score, + loss, + train_loss, + val_loss, + test_loss, + ], ): l.append(e) @@ -231,19 +309,29 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) if args.eval_last: model.load_state_dict(best_model_state_dict) eval_dataloader = DataLoader( - graph.cpu(), - test_idx.cpu(), - eval_sampler, - batch_size=eval_batch_size, - shuffle=False, - num_workers=4 + graph.cpu(), + test_idx.cpu(), + eval_sampler, + batch_size=eval_batch_size, + shuffle=False, + num_workers=4, ) final_test_score = evaluate( - args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper + args, + model, + eval_dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator_wrapper, )[2] print("*" * 50) - print(f"Average epoch time: {total_time / args.n_epochs}, Test score: {final_test_score}") + print( + f"Average epoch time: {total_time / args.n_epochs}, Test score: {final_test_score}" + ) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) @@ -251,8 +339,16 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) - for y, label in zip([train_scores, val_scores, test_scores], ["train score", "val score", "test score"]): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + for y, label in zip( + [train_scores, val_scores, test_scores], + ["train score", "val score", "test score"], + ): + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(20)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) @@ -268,9 +364,15 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( - [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"] + [losses, train_losses, val_losses, test_losses], + ["loss", "train loss", "val loss", "test loss"], ): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(20)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) @@ -286,14 +388,23 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) def count_parameters(args): model = gen_model(args) - return sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad]) + return sum( + [np.prod(p.size()) for p in model.parameters() if p.requires_grad] + ) def main(): global device - argparser = argparse.ArgumentParser("GAT on OGBN-Proteins", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.") + argparser = argparse.ArgumentParser( + "GAT on OGBN-Proteins", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides '--gpu'.", + ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument("--seed", type=int, help="seed", default=0) argparser.add_argument("--n-runs", type=int, default=10) @@ -304,8 +415,16 @@ def main(): argparser.add_argument("--dropout", type=float, default=0.2) argparser.add_argument("--input-drop", type=float, default=0) argparser.add_argument("--wd", type=float, default=0) - argparser.add_argument("--estimation-mode", action="store_true", help="Estimate the score of test set for speed.") - argparser.add_argument("--eval-last", action="store_true", help="Evaluate the score of test set at last.") + argparser.add_argument( + "--estimation-mode", + action="store_true", + help="Estimate the score of test set for speed.", + ) + argparser.add_argument( + "--eval-last", + action="store_true", + help="Evaluate the score of test set at last.", + ) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--log-every", type=int, default=1) argparser.add_argument("--plot-curves", action="store_true") @@ -317,7 +436,9 @@ def main(): device = torch.device("cuda:%d" % args.gpu) if args.estimation_mode: - print("WARNING: Estimation mode is enabled. The test score is not accurate.") + print( + "WARNING: Estimation mode is enabled. The test score is not accurate." + ) seed(args.seed) @@ -336,7 +457,9 @@ def main(): val_scores, test_scores = [], [] for i in range(1, args.n_runs + 1): - val_score, test_score = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i) + val_score, test_score = run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + ) val_scores.append(val_score) test_scores.append(test_score) @@ -349,7 +472,9 @@ def main(): print(f"Number of params: {count_parameters(args)}") if args.estimation_mode: - print("WARNING: Estimation mode is enabled. The test score is not accurate.") + print( + "WARNING: Estimation mode is enabled. The test score is not accurate." + ) if __name__ == "__main__": diff --git a/examples/pytorch/ogb/ogbn-products/mlp/models.py b/examples/pytorch/ogb/ogbn-products/mlp/models.py index 979e2ea99a5d..c64133862986 100644 --- a/examples/pytorch/ogb/ogbn-products/mlp/models.py +++ b/examples/pytorch/ogb/ogbn-products/mlp/models.py @@ -5,7 +5,15 @@ class MLP(nn.Module): def __init__( - self, in_feats, n_classes, n_layers, n_hidden, activation, dropout=0.0, input_drop=0.0, residual=False, + self, + in_feats, + n_classes, + n_layers, + n_hidden, + activation, + dropout=0.0, + input_drop=0.0, + residual=False, ): super().__init__() self.n_layers = n_layers diff --git a/examples/pytorch/ogb/ogbn-proteins/configure.py b/examples/pytorch/ogb/ogbn-proteins/configure.py index 540a94f79ed4..579ae8c7b407 100644 --- a/examples/pytorch/ogb/ogbn-proteins/configure.py +++ b/examples/pytorch/ogb/ogbn-proteins/configure.py @@ -2,39 +2,39 @@ import torch MWE_GCN_proteins = { - 'num_ew_channels': 8, - 'num_epochs': 2000, - 'in_feats': 1, - 'hidden_feats': 10, - 'out_feats': 112, - 'n_layers': 3, - 'lr': 2e-2, - 'weight_decay': 0, - 'patience': 1000, - 'dropout': 0.2, - 'aggr_mode': 'sum', ## 'sum' or 'concat' for the aggregation across channels - 'ewnorm': 'both' - } + "num_ew_channels": 8, + "num_epochs": 2000, + "in_feats": 1, + "hidden_feats": 10, + "out_feats": 112, + "n_layers": 3, + "lr": 2e-2, + "weight_decay": 0, + "patience": 1000, + "dropout": 0.2, + "aggr_mode": "sum", ## 'sum' or 'concat' for the aggregation across channels + "ewnorm": "both", +} MWE_DGCN_proteins = { - 'num_ew_channels': 8, - 'num_epochs': 2000, - 'in_feats': 1, - 'hidden_feats': 10, - 'out_feats': 112, - 'n_layers': 2, - 'lr': 1e-2, - 'weight_decay': 0, - 'patience': 300, - 'dropout': 0.5, - 'aggr_mode': 'sum', - 'residual': True, - 'ewnorm': 'none' - } + "num_ew_channels": 8, + "num_epochs": 2000, + "in_feats": 1, + "hidden_feats": 10, + "out_feats": 112, + "n_layers": 2, + "lr": 1e-2, + "weight_decay": 0, + "patience": 300, + "dropout": 0.5, + "aggr_mode": "sum", + "residual": True, + "ewnorm": "none", +} def get_exp_configure(args): - if (args['model'] == 'MWE-GCN'): + if args["model"] == "MWE-GCN": return MWE_GCN_proteins - elif (args['model'] == 'MWE-DGCN'): + elif args["model"] == "MWE-DGCN": return MWE_DGCN_proteins diff --git a/examples/pytorch/ogb/ogbn-proteins/gat.py b/examples/pytorch/ogb/ogbn-proteins/gat.py index 393c2c4f7754..59e561280a4c 100755 --- a/examples/pytorch/ogb/ogbn-proteins/gat.py +++ b/examples/pytorch/ogb/ogbn-proteins/gat.py @@ -7,20 +7,23 @@ import sys import time -import dgl -import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim -from dgl.dataloading import MultiLayerFullNeighborSampler, MultiLayerNeighborSampler -from dgl.dataloading import DataLoader from matplotlib.ticker import AutoMinorLocator, MultipleLocator +from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn -from models import GAT +import dgl +import dgl.function as fn +from dgl.dataloading import ( + DataLoader, + MultiLayerFullNeighborSampler, + MultiLayerNeighborSampler, +) device = None dataset = "ogbn-proteins" @@ -43,7 +46,11 @@ def load_data(dataset): evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() - train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"] + train_idx, val_idx, test_idx = ( + splitted_idx["train"], + splitted_idx["valid"], + splitted_idx["test"], + ) graph, labels = data[0] graph.ndata["labels"] = labels @@ -54,11 +61,15 @@ def preprocess(graph, labels, train_idx): global n_node_feats # The sum of the weights of adjacent edges is used as node features. - graph.update_all(fn.copy_e("feat", "feat_copy"), fn.sum("feat_copy", "feat")) + graph.update_all( + fn.copy_e("feat", "feat_copy"), fn.sum("feat_copy", "feat") + ) n_node_feats = graph.ndata["feat"].shape[-1] # Only the labels in the training set are used as features, while others are filled with zeros. - graph.ndata["train_labels_onehot"] = torch.zeros(graph.number_of_nodes(), n_classes) + graph.ndata["train_labels_onehot"] = torch.zeros( + graph.number_of_nodes(), n_classes + ) graph.ndata["train_labels_onehot"][train_idx, labels[train_idx, 0]] = 1 graph.ndata["deg"] = graph.out_degrees().float().clamp(min=1) @@ -99,7 +110,16 @@ def add_labels(graph, idx): graph.srcdata["feat"] = torch.cat([feat, train_labels_onehot], dim=-1) -def train(args, model, dataloader, _labels, _train_idx, criterion, optimizer, _evaluator): +def train( + args, + model, + dataloader, + _labels, + _train_idx, + criterion, + optimizer, + _evaluator, +): model.train() loss_sum, total = 0, 0 @@ -109,7 +129,9 @@ def train(args, model, dataloader, _labels, _train_idx, criterion, optimizer, _e new_train_idx = torch.arange(len(output_nodes), device=device) if args.use_labels: - train_labels_idx = torch.arange(len(output_nodes), len(input_nodes), device=device) + train_labels_idx = torch.arange( + len(output_nodes), len(input_nodes), device=device + ) train_pred_idx = new_train_idx add_labels(subgraphs[0], train_labels_idx) @@ -117,7 +139,10 @@ def train(args, model, dataloader, _labels, _train_idx, criterion, optimizer, _e train_pred_idx = new_train_idx pred = model(subgraphs) - loss = criterion(pred[train_pred_idx], subgraphs[-1].dstdata["labels"][train_pred_idx].float()) + loss = criterion( + pred[train_pred_idx], + subgraphs[-1].dstdata["labels"][train_pred_idx].float(), + ) optimizer.zero_grad() loss.backward() optimizer.step() @@ -132,7 +157,17 @@ def train(args, model, dataloader, _labels, _train_idx, criterion, optimizer, _e @torch.no_grad() -def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator): +def evaluate( + args, + model, + dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator, +): model.eval() preds = torch.zeros(labels.shape).to(device) @@ -170,37 +205,49 @@ def evaluate(args, model, dataloader, labels, train_idx, val_idx, test_idx, crit ) -def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): - evaluator_wrapper = lambda pred, labels: evaluator.eval({"y_pred": pred, "y_true": labels})["rocauc"] +def run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running +): + evaluator_wrapper = lambda pred, labels: evaluator.eval( + {"y_pred": pred, "y_true": labels} + )["rocauc"] train_batch_size = (len(train_idx) + 9) // 10 # batch_size = len(train_idx) - train_sampler = MultiLayerNeighborSampler([32 for _ in range(args.n_layers)]) + train_sampler = MultiLayerNeighborSampler( + [32 for _ in range(args.n_layers)] + ) # sampler = MultiLayerFullNeighborSampler(args.n_layers) train_dataloader = DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_size=train_batch_size, - num_workers=10, + num_workers=10, ) - eval_sampler = MultiLayerNeighborSampler([100 for _ in range(args.n_layers)]) + eval_sampler = MultiLayerNeighborSampler( + [100 for _ in range(args.n_layers)] + ) # sampler = MultiLayerFullNeighborSampler(args.n_layers) - eval_dataloader = DataLoader( + eval_dataloader = DataLoader( graph.cpu(), torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx.cpu()]), eval_sampler, batch_size=65536, - num_workers=10, + num_workers=10, ) criterion = nn.BCEWithLogitsLoss() model = gen_model(args).to(device) - optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) - lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.75, patience=50, verbose=True) + optimizer = optim.AdamW( + model.parameters(), lr=args.lr, weight_decay=args.wd + ) + lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( + optimizer, mode="max", factor=0.75, patience=50, verbose=True + ) total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 @@ -212,14 +259,43 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) for epoch in range(1, args.n_epochs + 1): tic = time.time() - loss = train(args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper) + loss = train( + args, + model, + train_dataloader, + labels, + train_idx, + criterion, + optimizer, + evaluator_wrapper, + ) toc = time.time() total_time += toc - tic - if epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0: - train_score, val_score, test_score, train_loss, val_loss, test_loss, pred = evaluate( - args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper + if ( + epoch == args.n_epochs + or epoch % args.eval_every == 0 + or epoch % args.log_every == 0 + ): + ( + train_score, + val_score, + test_score, + train_loss, + val_loss, + test_loss, + pred, + ) = evaluate( + args, + model, + eval_dataloader, + labels, + train_idx, + val_idx, + test_idx, + criterion, + evaluator_wrapper, ) if val_score > best_val_score: @@ -238,15 +314,33 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ) for l, e in zip( - [train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses], - [train_score, val_score, test_score, loss, train_loss, val_loss, test_loss], + [ + train_scores, + val_scores, + test_scores, + losses, + train_losses, + val_losses, + test_losses, + ], + [ + train_score, + val_score, + test_score, + loss, + train_loss, + val_loss, + test_loss, + ], ): l.append(e) lr_scheduler.step(val_score) print("*" * 50) - print(f"Best val score: {best_val_score}, Final test score: {final_test_score}") + print( + f"Best val score: {best_val_score}, Final test score: {final_test_score}" + ) print("*" * 50) if args.plot: @@ -255,8 +349,16 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) - for y, label in zip([train_scores, val_scores, test_scores], ["train score", "val score", "test score"]): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + for y, label in zip( + [train_scores, val_scores, test_scores], + ["train score", "val score", "test score"], + ): + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) @@ -272,9 +374,15 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( - [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"] + [losses, train_losses, val_losses, test_losses], + ["loss", "train loss", "val loss", "test loss"], ): - plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) + plt.plot( + range(1, args.n_epochs + 1, args.log_every), + y, + label=label, + linewidth=1, + ) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) @@ -294,37 +402,79 @@ def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running) def count_parameters(args): model = gen_model(args) - return sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad]) + return sum( + [np.prod(p.size()) for p in model.parameters() if p.requires_grad] + ) def main(): global device argparser = argparse.ArgumentParser( - "GAT implementation on ogbn-proteins", formatter_class=argparse.ArgumentDefaultsHelpFormatter + "GAT implementation on ogbn-proteins", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + argparser.add_argument( + "--cpu", + action="store_true", + help="CPU mode. This option overrides '--gpu'.", ) - argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.") argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID") argparser.add_argument("--seed", type=int, default=0, help="random seed") - argparser.add_argument("--n-runs", type=int, default=10, help="running times") - argparser.add_argument("--n-epochs", type=int, default=1200, help="number of epochs") argparser.add_argument( - "--use-labels", action="store_true", help="Use labels in the training set as input features." - ) - argparser.add_argument("--no-attn-dst", action="store_true", help="Don't use attn_dst.") - argparser.add_argument("--n-heads", type=int, default=6, help="number of heads") - argparser.add_argument("--lr", type=float, default=0.01, help="learning rate") - argparser.add_argument("--n-layers", type=int, default=6, help="number of layers") - argparser.add_argument("--n-hidden", type=int, default=80, help="number of hidden units") - argparser.add_argument("--dropout", type=float, default=0.25, help="dropout rate") - argparser.add_argument("--input-drop", type=float, default=0.1, help="input drop rate") - argparser.add_argument("--attn-drop", type=float, default=0.0, help="attention dropout rate") - argparser.add_argument("--edge-drop", type=float, default=0.1, help="edge drop rate") + "--n-runs", type=int, default=10, help="running times" + ) + argparser.add_argument( + "--n-epochs", type=int, default=1200, help="number of epochs" + ) + argparser.add_argument( + "--use-labels", + action="store_true", + help="Use labels in the training set as input features.", + ) + argparser.add_argument( + "--no-attn-dst", action="store_true", help="Don't use attn_dst." + ) + argparser.add_argument( + "--n-heads", type=int, default=6, help="number of heads" + ) + argparser.add_argument( + "--lr", type=float, default=0.01, help="learning rate" + ) + argparser.add_argument( + "--n-layers", type=int, default=6, help="number of layers" + ) + argparser.add_argument( + "--n-hidden", type=int, default=80, help="number of hidden units" + ) + argparser.add_argument( + "--dropout", type=float, default=0.25, help="dropout rate" + ) + argparser.add_argument( + "--input-drop", type=float, default=0.1, help="input drop rate" + ) + argparser.add_argument( + "--attn-drop", type=float, default=0.0, help="attention dropout rate" + ) + argparser.add_argument( + "--edge-drop", type=float, default=0.1, help="edge drop rate" + ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") - argparser.add_argument("--eval-every", type=int, default=5, help="evaluate every EVAL_EVERY epochs") - argparser.add_argument("--log-every", type=int, default=5, help="log every LOG_EVERY epochs") - argparser.add_argument("--plot", action="store_true", help="plot learning curves") - argparser.add_argument("--save-pred", action="store_true", help="save final predictions") + argparser.add_argument( + "--eval-every", + type=int, + default=5, + help="evaluate every EVAL_EVERY epochs", + ) + argparser.add_argument( + "--log-every", type=int, default=5, help="log every LOG_EVERY epochs" + ) + argparser.add_argument( + "--plot", action="store_true", help="plot learning curves" + ) + argparser.add_argument( + "--save-pred", action="store_true", help="save final predictions" + ) args = argparser.parse_args() if args.cpu: @@ -338,7 +488,9 @@ def main(): print("Preprocessing") graph, labels = preprocess(graph, labels, train_idx) - labels, train_idx, val_idx, test_idx = map(lambda x: x.to(device), (labels, train_idx, val_idx, test_idx)) + labels, train_idx, val_idx, test_idx = map( + lambda x: x.to(device), (labels, train_idx, val_idx, test_idx) + ) # run val_scores, test_scores = [], [] @@ -346,7 +498,9 @@ def main(): for i in range(args.n_runs): print("Running", i) seed(args.seed + i) - val_score, test_score = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1) + val_score, test_score = run( + args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1 + ) val_scores.append(val_score) test_scores.append(test_score) diff --git a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py index ce134fa986c1..930a611e2620 100644 --- a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py +++ b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py @@ -1,7 +1,6 @@ import os import time -import dgl.function as fn import numpy as np import torch import torch.nn as nn @@ -11,9 +10,10 @@ from torch.optim import Adam from torch.optim.lr_scheduler import ReduceLROnPlateau from torch.utils.tensorboard import SummaryWriter - from utils import load_model, set_random_seed +import dgl.function as fn + def normalize_edge_weights(graph, device, num_ew_channels): degs = graph.in_degrees().float() @@ -24,7 +24,9 @@ def normalize_edge_weights(graph, device, num_ew_channels): graph.apply_edges(fn.e_div_u("feat", "norm", "feat")) graph.apply_edges(fn.e_div_v("feat", "norm", "feat")) for channel in range(num_ew_channels): - graph.edata["feat_" + str(channel)] = graph.edata["feat"][:, channel : channel + 1] + graph.edata["feat_" + str(channel)] = graph.edata["feat"][ + :, channel : channel + 1 + ] def run_a_train_epoch(graph, node_idx, model, criterion, optimizer, evaluator): @@ -50,9 +52,24 @@ def run_an_eval_epoch(graph, splitted_idx, model, evaluator): labels = graph.ndata["labels"].cpu().numpy() preds = logits.cpu().detach().numpy() - train_score = evaluator.eval({"y_true": labels[splitted_idx["train"]], "y_pred": preds[splitted_idx["train"]]}) - val_score = evaluator.eval({"y_true": labels[splitted_idx["valid"]], "y_pred": preds[splitted_idx["valid"]]}) - test_score = evaluator.eval({"y_true": labels[splitted_idx["test"]], "y_pred": preds[splitted_idx["test"]]}) + train_score = evaluator.eval( + { + "y_true": labels[splitted_idx["train"]], + "y_pred": preds[splitted_idx["train"]], + } + ) + val_score = evaluator.eval( + { + "y_true": labels[splitted_idx["valid"]], + "y_pred": preds[splitted_idx["valid"]], + } + ) + test_score = evaluator.eval( + { + "y_true": labels[splitted_idx["test"]], + "y_pred": preds[splitted_idx["test"]], + } + ) return train_score["rocauc"], val_score["rocauc"], test_score["rocauc"] @@ -75,12 +92,18 @@ def main(args): elif args["ewnorm"] == "none": print("Not normalizing edge weights") for channel in range(args["num_ew_channels"]): - graph.edata["feat_" + str(channel)] = graph.edata["feat"][:, channel : channel + 1] + graph.edata["feat_" + str(channel)] = graph.edata["feat"][ + :, channel : channel + 1 + ] model = load_model(args).to(args["device"]) - optimizer = Adam(model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"]) + optimizer = Adam( + model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"] + ) min_lr = 1e-3 - scheduler = ReduceLROnPlateau(optimizer, "max", factor=0.7, patience=100, verbose=True, min_lr=min_lr) + scheduler = ReduceLROnPlateau( + optimizer, "max", factor=0.7, patience=100, verbose=True, min_lr=min_lr + ) print("scheduler min_lr", min_lr) criterion = nn.BCEWithLogitsLoss() @@ -95,7 +118,9 @@ def main(args): best_val_score = 0.0 num_patient_epochs = 0 model_folder = "./saved_models/" - model_path = model_folder + str(args["exp_name"]) + "_" + str(args["postfix"]) + model_path = ( + model_folder + str(args["exp_name"]) + "_" + str(args["postfix"]) + ) if not os.path.exists(model_folder): os.makedirs(model_folder) @@ -104,7 +129,9 @@ def main(args): if epoch >= 3: t0 = time.time() - loss, train_score = run_a_train_epoch(graph, splitted_idx["train"], model, criterion, optimizer, evaluator) + loss, train_score = run_a_train_epoch( + graph, splitted_idx["train"], model, criterion, optimizer, evaluator + ) if epoch >= 3: dur.append(time.time() - t0) @@ -112,7 +139,9 @@ def main(args): else: avg_time = None - train_score, val_score, test_score = run_an_eval_epoch(graph, splitted_idx, model, evaluator) + train_score, val_score, test_score = run_an_eval_epoch( + graph, splitted_idx, model, evaluator + ) scheduler.step(val_score) @@ -127,7 +156,12 @@ def main(args): print( "Epoch {:d}, loss {:.4f}, train score {:.4f}, " "val score {:.4f}, avg time {}, num patient epochs {:d}".format( - epoch, loss, train_score, val_score, avg_time, num_patient_epochs + epoch, + loss, + train_score, + val_score, + avg_time, + num_patient_epochs, ) ) @@ -135,7 +169,9 @@ def main(args): break model.load_state_dict(torch.load(model_path)) - train_score, val_score, test_score = run_an_eval_epoch(graph, splitted_idx, model, evaluator) + train_score, val_score, test_score = run_an_eval_epoch( + graph, splitted_idx, model, evaluator + ) print("Train score {:.4f}".format(train_score)) print("Valid score {:.4f}".format(val_score)) print("Test score {:.4f}".format(test_score)) @@ -153,15 +189,34 @@ def main(args): from configure import get_exp_configure - parser = argparse.ArgumentParser(description="OGB node property prediction with DGL using full graph training") + parser = argparse.ArgumentParser( + description="OGB node property prediction with DGL using full graph training" + ) parser.add_argument( - "-m", "--model", type=str, choices=["MWE-GCN", "MWE-DGCN"], default="MWE-DGCN", help="Model to use" + "-m", + "--model", + type=str, + choices=["MWE-GCN", "MWE-DGCN"], + default="MWE-DGCN", + help="Model to use", ) parser.add_argument("-c", "--cuda", type=str, default="none") - parser.add_argument("--postfix", type=str, default="", help="a string appended to the file name of the saved model") - parser.add_argument("--rand_seed", type=int, default=-1, help="random seed for torch and numpy") + parser.add_argument( + "--postfix", + type=str, + default="", + help="a string appended to the file name of the saved model", + ) + parser.add_argument( + "--rand_seed", + type=int, + default=-1, + help="random seed for torch and numpy", + ) parser.add_argument("--residual", action="store_true") - parser.add_argument("--ewnorm", type=str, default="none", choices=["none", "both"]) + parser.add_argument( + "--ewnorm", type=str, default="none", choices=["none", "both"] + ) args = parser.parse_args().__dict__ # Get experiment configuration diff --git a/examples/pytorch/ogb/ogbn-proteins/utils.py b/examples/pytorch/ogb/ogbn-proteins/utils.py index 13e14c2f7d1a..c709c37d8a2d 100644 --- a/examples/pytorch/ogb/ogbn-proteins/utils.py +++ b/examples/pytorch/ogb/ogbn-proteins/utils.py @@ -3,7 +3,6 @@ import numpy as np import torch import torch.nn.functional as F - from models import MWE_DGCN, MWE_GCN @@ -87,4 +86,3 @@ def print_statistics(self, run=None): print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 3] print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}") - diff --git a/examples/pytorch/ogb/seal_ogbl/main.py b/examples/pytorch/ogb/seal_ogbl/main.py index c9b9b6ee9ee3..2935b0f95b8a 100644 --- a/examples/pytorch/ogb/seal_ogbl/main.py +++ b/examples/pytorch/ogb/seal_ogbl/main.py @@ -1,20 +1,29 @@ import argparse -import time -import os -import sys import math +import os import random -from tqdm import tqdm +import sys +import time + import numpy as np import torch -from torch.nn import ModuleList, Linear, Conv1d, MaxPool1d, Embedding, BCEWithLogitsLoss import torch.nn.functional as F +from ogb.linkproppred import DglLinkPropPredDataset, Evaluator +from scipy.sparse.csgraph import shortest_path +from torch.nn import ( + BCEWithLogitsLoss, + Conv1d, + Embedding, + Linear, + MaxPool1d, + ModuleList, +) +from tqdm import tqdm + import dgl +from dgl.dataloading import DataLoader, Sampler from dgl.nn import GraphConv, SortPooling from dgl.sampling import global_uniform_negative_sampling -from dgl.dataloading import Sampler, DataLoader -from ogb.linkproppred import DglLinkPropPredDataset, Evaluator -from scipy.sparse.csgraph import shortest_path class Logger(object): @@ -32,10 +41,10 @@ def print_statistics(self, run=None, f=sys.stdout): if run is not None: result = 100 * torch.tensor(self.results[run]) argmax = result[:, 0].argmax().item() - print(f'Run {run + 1:02d}:', file=f) - print(f'Highest Valid: {result[:, 0].max():.2f}', file=f) - print(f'Highest Eval Point: {argmax + 1}', file=f) - print(f' Final Test: {result[argmax, 1]:.2f}', file=f) + print(f"Run {run + 1:02d}:", file=f) + print(f"Highest Valid: {result[:, 0].max():.2f}", file=f) + print(f"Highest Eval Point: {argmax + 1}", file=f) + print(f" Final Test: {result[argmax, 1]:.2f}", file=f) else: result = 100 * torch.tensor(self.results) @@ -47,16 +56,23 @@ def print_statistics(self, run=None, f=sys.stdout): best_result = torch.tensor(best_results) - print(f'All runs:', file=f) + print(f"All runs:", file=f) r = best_result[:, 0] - print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}', file=f) + print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}", file=f) r = best_result[:, 1] - print(f' Final Test: {r.mean():.2f} ± {r.std():.2f}', file=f) + print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}", file=f) class SealSampler(Sampler): - def __init__(self, g, num_hops=1, sample_ratio=1., directed=False, - prefetch_node_feats=None, prefetch_edge_feats=None): + def __init__( + self, + g, + num_hops=1, + sample_ratio=1.0, + directed=False, + prefetch_node_feats=None, + prefetch_edge_feats=None, + ): super().__init__() self.g = g self.num_hops = num_hops @@ -71,22 +87,29 @@ def _double_radius_node_labeling(self, adj): idx = list(range(1)) + list(range(2, N)) adj_wo_dst = adj[idx, :][:, idx] - dist2src = shortest_path(adj_wo_dst, directed=False, unweighted=True, indices=0) + dist2src = shortest_path( + adj_wo_dst, directed=False, unweighted=True, indices=0 + ) dist2src = np.insert(dist2src, 1, 0, axis=0) dist2src = torch.from_numpy(dist2src) - dist2dst = shortest_path(adj_wo_src, directed=False, unweighted=True, indices=0) + dist2dst = shortest_path( + adj_wo_src, directed=False, unweighted=True, indices=0 + ) dist2dst = np.insert(dist2dst, 0, 0, axis=0) dist2dst = torch.from_numpy(dist2dst) dist = dist2src + dist2dst - dist_over_2, dist_mod_2 = torch.div(dist, 2, rounding_mode='floor'), dist % 2 + dist_over_2, dist_mod_2 = ( + torch.div(dist, 2, rounding_mode="floor"), + dist % 2, + ) z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) - z[0: 2] = 1. + z[0:2] = 1.0 # shortest path may include inf values - z[torch.isnan(z)] = 0. + z[torch.isnan(z)] = 0.0 return z.to(torch.long) @@ -107,9 +130,12 @@ def sample(self, aug_g, seed_edges): fringe = np.union1d(in_neighbors, out_neighbors) fringe = np.setdiff1d(fringe, visited) visited = np.union1d(visited, fringe) - if self.sample_ratio < 1.: - fringe = np.random.choice(fringe, - int(self.sample_ratio * len(fringe)), replace=False) + if self.sample_ratio < 1.0: + fringe = np.random.choice( + fringe, + int(self.sample_ratio * len(fringe)), + replace=False, + ) if len(fringe) == 0: break nodes = np.union1d(nodes, fringe) @@ -117,26 +143,34 @@ def sample(self, aug_g, seed_edges): # remove edges to predict edges_to_remove = [ - subg.edge_ids(s, t) for s, t in [(0, 1), (1, 0)] if subg.has_edges_between(s, t)] + subg.edge_ids(s, t) + for s, t in [(0, 1), (1, 0)] + if subg.has_edges_between(s, t) + ] subg.remove_edges(edges_to_remove) # add double radius node labeling - subg.ndata['z'] = self._double_radius_node_labeling(subg.adj(scipy_fmt='csr')) + subg.ndata["z"] = self._double_radius_node_labeling( + subg.adj(scipy_fmt="csr") + ) subg_aug = subg.add_self_loop() - if 'weight' in subg.edata: - subg_aug.edata['weight'][subg.num_edges():] = torch.ones( - subg_aug.num_edges() - subg.num_edges()) + if "weight" in subg.edata: + subg_aug.edata["weight"][subg.num_edges() :] = torch.ones( + subg_aug.num_edges() - subg.num_edges() + ) subgraphs.append(subg_aug) subgraphs = dgl.batch(subgraphs) dgl.set_src_lazy_features(subg_aug, self.prefetch_node_feats) dgl.set_edge_lazy_features(subg_aug, self.prefetch_edge_feats) - return subgraphs, aug_g.edata['y'][seed_edges] + return subgraphs, aug_g.edata["y"][seed_edges] # An end-to-end deep learning architecture for graph classification, AAAI-18. class DGCNN(torch.nn.Module): - def __init__(self, hidden_channels, num_layers, k, GNN=GraphConv, feature_dim=0): + def __init__( + self, hidden_channels, num_layers, k, GNN=GraphConv, feature_dim=0 + ): super(DGCNN, self).__init__() self.feature_dim = feature_dim self.k = k @@ -149,18 +183,18 @@ def __init__(self, hidden_channels, num_layers, k, GNN=GraphConv, feature_dim=0) initial_channels = hidden_channels + self.feature_dim self.convs.append(GNN(initial_channels, hidden_channels)) - for _ in range(0, num_layers-1): + for _ in range(0, num_layers - 1): self.convs.append(GNN(hidden_channels, hidden_channels)) self.convs.append(GNN(hidden_channels, 1)) conv1d_channels = [16, 32] total_latent_dim = hidden_channels * num_layers + 1 conv1d_kws = [total_latent_dim, 5] - self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], - conv1d_kws[0]) + self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0]) self.maxpool1d = MaxPool1d(2, 2) - self.conv2 = Conv1d(conv1d_channels[0], conv1d_channels[1], - conv1d_kws[1], 1) + self.conv2 = Conv1d( + conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1 + ) dense_dim = int((self.k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.lin1 = Linear(dense_dim, 128) @@ -196,33 +230,35 @@ def forward(self, g, z, x=None, edge_weight=None): def get_pos_neg_edges(split, split_edge, g, percent=100): - pos_edge = split_edge[split]['edge'] - if split == 'train': - neg_edge = torch.stack(global_uniform_negative_sampling( - g, num_samples=pos_edge.size(0), - exclude_self_loops=True - ), dim=1) + pos_edge = split_edge[split]["edge"] + if split == "train": + neg_edge = torch.stack( + global_uniform_negative_sampling( + g, num_samples=pos_edge.size(0), exclude_self_loops=True + ), + dim=1, + ) else: - neg_edge = split_edge[split]['edge_neg'] + neg_edge = split_edge[split]["edge_neg"] # sampling according to the percent param np.random.seed(123) # pos sampling num_pos = pos_edge.size(0) perm = np.random.permutation(num_pos) - perm = perm[:int(percent / 100 * num_pos)] + perm = perm[: int(percent / 100 * num_pos)] pos_edge = pos_edge[perm] # neg sampling - if neg_edge.dim() > 2: # [Np, Nn, 2] + if neg_edge.dim() > 2: # [Np, Nn, 2] neg_edge = neg_edge[perm].view(-1, 2) else: np.random.seed(123) num_neg = neg_edge.size(0) perm = np.random.permutation(num_neg) - perm = perm[:int(percent / 100 * num_neg)] + perm = perm[: int(percent / 100 * num_neg)] neg_edge = neg_edge[perm] - return pos_edge, neg_edge # ([2, Np], [2, Nn]) -> ([Np, 2], [Nn, 2]) + return pos_edge, neg_edge # ([2, Np], [2, Nn]) -> ([Np, 2], [Nn, 2]) def train(): @@ -233,8 +269,12 @@ def train(): pbar = tqdm(train_loader, ncols=70) for gs, y in pbar: optimizer.zero_grad() - logits = model(gs, gs.ndata['z'], gs.ndata.get('feat', None), - edge_weight=gs.edata.get('weight', None)) + logits = model( + gs, + gs.ndata["z"], + gs.ndata.get("feat", None), + edge_weight=gs.edata.get("weight", None), + ) loss = loss_fnt(logits.view(-1), y.to(torch.float)) loss.backward() optimizer.step() @@ -250,28 +290,40 @@ def test(): y_pred, y_true = [], [] for gs, y in tqdm(val_loader, ncols=70): - logits = model(gs, gs.ndata['z'], gs.ndata.get('feat', None), - edge_weight=gs.edata.get('weight', None)) + logits = model( + gs, + gs.ndata["z"], + gs.ndata.get("feat", None), + edge_weight=gs.edata.get("weight", None), + ) y_pred.append(logits.view(-1).cpu()) y_true.append(y.view(-1).cpu().to(torch.float)) val_pred, val_true = torch.cat(y_pred), torch.cat(y_true) - pos_val_pred = val_pred[val_true==1] - neg_val_pred = val_pred[val_true==0] + pos_val_pred = val_pred[val_true == 1] + neg_val_pred = val_pred[val_true == 0] y_pred, y_true = [], [] for gs, y in tqdm(test_loader, ncols=70): - logits = model(gs, gs.ndata['z'], gs.ndata.get('feat', None), - edge_weight=gs.edata.get('weight', None)) + logits = model( + gs, + gs.ndata["z"], + gs.ndata.get("feat", None), + edge_weight=gs.edata.get("weight", None), + ) y_pred.append(logits.view(-1).cpu()) y_true.append(y.view(-1).cpu().to(torch.float)) test_pred, test_true = torch.cat(y_pred), torch.cat(y_true) - pos_test_pred = test_pred[test_true==1] - neg_test_pred = test_pred[test_true==0] - - if args.eval_metric == 'hits': - results = evaluate_hits(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred) - elif args.eval_metric == 'mrr': - results = evaluate_mrr(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred) + pos_test_pred = test_pred[test_true == 1] + neg_test_pred = test_pred[test_true == 0] + + if args.eval_metric == "hits": + results = evaluate_hits( + pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred + ) + elif args.eval_metric == "mrr": + results = evaluate_mrr( + pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred + ) return results @@ -280,184 +332,254 @@ def evaluate_hits(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): results = {} for K in [20, 50, 100]: evaluator.K = K - valid_hits = evaluator.eval({ - 'y_pred_pos': pos_val_pred, - 'y_pred_neg': neg_val_pred, - })[f'hits@{K}'] - test_hits = evaluator.eval({ - 'y_pred_pos': pos_test_pred, - 'y_pred_neg': neg_test_pred, - })[f'hits@{K}'] - - results[f'Hits@{K}'] = (valid_hits, test_hits) + valid_hits = evaluator.eval( + { + "y_pred_pos": pos_val_pred, + "y_pred_neg": neg_val_pred, + } + )[f"hits@{K}"] + test_hits = evaluator.eval( + { + "y_pred_pos": pos_test_pred, + "y_pred_neg": neg_test_pred, + } + )[f"hits@{K}"] + + results[f"Hits@{K}"] = (valid_hits, test_hits) return results - + def evaluate_mrr(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): - print(pos_val_pred.size(), neg_val_pred.size(), pos_test_pred.size(), neg_test_pred.size()) + print( + pos_val_pred.size(), + neg_val_pred.size(), + pos_test_pred.size(), + neg_test_pred.size(), + ) neg_val_pred = neg_val_pred.view(pos_val_pred.shape[0], -1) neg_test_pred = neg_test_pred.view(pos_test_pred.shape[0], -1) results = {} - valid_mrr = evaluator.eval({ - 'y_pred_pos': pos_val_pred, - 'y_pred_neg': neg_val_pred, - })['mrr_list'].mean().item() - - test_mrr = evaluator.eval({ - 'y_pred_pos': pos_test_pred, - 'y_pred_neg': neg_test_pred, - })['mrr_list'].mean().item() - - results['MRR'] = (valid_mrr, test_mrr) - + valid_mrr = ( + evaluator.eval( + { + "y_pred_pos": pos_val_pred, + "y_pred_neg": neg_val_pred, + } + )["mrr_list"] + .mean() + .item() + ) + + test_mrr = ( + evaluator.eval( + { + "y_pred_pos": pos_test_pred, + "y_pred_neg": neg_test_pred, + } + )["mrr_list"] + .mean() + .item() + ) + + results["MRR"] = (valid_mrr, test_mrr) + return results -if __name__ == '__main__': +if __name__ == "__main__": # Data settings - parser = argparse.ArgumentParser(description='OGBL (SEAL)') - parser.add_argument('--dataset', type=str, default='ogbl-collab') + parser = argparse.ArgumentParser(description="OGBL (SEAL)") + parser.add_argument("--dataset", type=str, default="ogbl-collab") # GNN settings - parser.add_argument('--sortpool_k', type=float, default=0.6) - parser.add_argument('--num_layers', type=int, default=3) - parser.add_argument('--hidden_channels', type=int, default=32) - parser.add_argument('--batch_size', type=int, default=32) + parser.add_argument("--sortpool_k", type=float, default=0.6) + parser.add_argument("--num_layers", type=int, default=3) + parser.add_argument("--hidden_channels", type=int, default=32) + parser.add_argument("--batch_size", type=int, default=32) # Subgraph extraction settings - parser.add_argument('--ratio_per_hop', type=float, default=1.0) - parser.add_argument('--use_feature', action='store_true', - help="whether to use raw node features as GNN input") - parser.add_argument('--use_edge_weight', action='store_true', - help="whether to consider edge weight in GNN") + parser.add_argument("--ratio_per_hop", type=float, default=1.0) + parser.add_argument( + "--use_feature", + action="store_true", + help="whether to use raw node features as GNN input", + ) + parser.add_argument( + "--use_edge_weight", + action="store_true", + help="whether to consider edge weight in GNN", + ) # Training settings - parser.add_argument('--lr', type=float, default=0.0001) - parser.add_argument('--epochs', type=int, default=50) - parser.add_argument('--runs', type=int, default=10) - parser.add_argument('--train_percent', type=float, default=100) - parser.add_argument('--val_percent', type=float, default=100) - parser.add_argument('--test_percent', type=float, default=100) - parser.add_argument('--num_workers', type=int, default=8, - help="number of workers for dynamic dataloaders") + parser.add_argument("--lr", type=float, default=0.0001) + parser.add_argument("--epochs", type=int, default=50) + parser.add_argument("--runs", type=int, default=10) + parser.add_argument("--train_percent", type=float, default=100) + parser.add_argument("--val_percent", type=float, default=100) + parser.add_argument("--test_percent", type=float, default=100) + parser.add_argument( + "--num_workers", + type=int, + default=8, + help="number of workers for dynamic dataloaders", + ) # Testing settings - parser.add_argument('--use_valedges_as_input', action='store_true') - parser.add_argument('--eval_steps', type=int, default=1) + parser.add_argument("--use_valedges_as_input", action="store_true") + parser.add_argument("--eval_steps", type=int, default=1) args = parser.parse_args() - data_appendix = '_rph{}'.format(''.join(str(args.ratio_per_hop).split('.'))) + data_appendix = "_rph{}".format("".join(str(args.ratio_per_hop).split("."))) if args.use_valedges_as_input: - data_appendix += '_uvai' + data_appendix += "_uvai" - args.res_dir = os.path.join('results/{}_{}'.format(args.dataset, - time.strftime("%Y%m%d%H%M%S"))) - print('Results will be saved in ' + args.res_dir) + args.res_dir = os.path.join( + "results/{}_{}".format(args.dataset, time.strftime("%Y%m%d%H%M%S")) + ) + print("Results will be saved in " + args.res_dir) if not os.path.exists(args.res_dir): - os.makedirs(args.res_dir) - log_file = os.path.join(args.res_dir, 'log.txt') + os.makedirs(args.res_dir) + log_file = os.path.join(args.res_dir, "log.txt") # Save command line input. - cmd_input = 'python ' + ' '.join(sys.argv) + '\n' - with open(os.path.join(args.res_dir, 'cmd_input.txt'), 'a') as f: + cmd_input = "python " + " ".join(sys.argv) + "\n" + with open(os.path.join(args.res_dir, "cmd_input.txt"), "a") as f: f.write(cmd_input) - print('Command line input: ' + cmd_input + ' is saved.') - with open(log_file, 'a') as f: - f.write('\n' + cmd_input) + print("Command line input: " + cmd_input + " is saved.") + with open(log_file, "a") as f: + f.write("\n" + cmd_input) dataset = DglLinkPropPredDataset(name=args.dataset) split_edge = dataset.get_edge_split() graph = dataset[0] # re-format the data of citation2 - if args.dataset == 'ogbl-citation2': - for k in ['train', 'valid', 'test']: - src = split_edge[k]['source_node'] - tgt = split_edge[k]['target_node'] - split_edge[k]['edge'] = torch.stack([src, tgt], dim=1) - if k != 'train': - tgt_neg = split_edge[k]['target_node_neg'] - split_edge[k]['edge_neg'] = torch.stack([ - src[:, None].repeat(1, tgt_neg.size(1)), - tgt_neg - ], dim=-1) # [Ns, Nt, 2] + if args.dataset == "ogbl-citation2": + for k in ["train", "valid", "test"]: + src = split_edge[k]["source_node"] + tgt = split_edge[k]["target_node"] + split_edge[k]["edge"] = torch.stack([src, tgt], dim=1) + if k != "train": + tgt_neg = split_edge[k]["target_node_neg"] + split_edge[k]["edge_neg"] = torch.stack( + [src[:, None].repeat(1, tgt_neg.size(1)), tgt_neg], dim=-1 + ) # [Ns, Nt, 2] # reconstruct the graph for ogbl-collab data for validation edge augmentation and coalesce - if args.dataset == 'ogbl-collab': + if args.dataset == "ogbl-collab": if args.use_valedges_as_input: - val_edges = split_edge['valid']['edge'] + val_edges = split_edge["valid"]["edge"] row, col = val_edges.t() # float edata for to_simple transform - graph.edata.pop('year') - graph.edata['weight'] = graph.edata['weight'].to(torch.float) + graph.edata.pop("year") + graph.edata["weight"] = graph.edata["weight"].to(torch.float) val_weights = torch.ones(size=(val_edges.size(0), 1)) - graph.add_edges(torch.cat([row, col]), torch.cat([col, row]), {'weight': val_weights}) - graph = graph.to_simple(copy_edata=True, aggregator='sum') - - if not args.use_edge_weight and 'weight' in graph.edata: - graph.edata.pop('weight') - if not args.use_feature and 'feat' in graph.ndata: - graph.ndata.pop('feat') - - if args.dataset.startswith('ogbl-citation'): - args.eval_metric = 'mrr' + graph.add_edges( + torch.cat([row, col]), + torch.cat([col, row]), + {"weight": val_weights}, + ) + graph = graph.to_simple(copy_edata=True, aggregator="sum") + + if not args.use_edge_weight and "weight" in graph.edata: + graph.edata.pop("weight") + if not args.use_feature and "feat" in graph.ndata: + graph.ndata.pop("feat") + + if args.dataset.startswith("ogbl-citation"): + args.eval_metric = "mrr" directed = True else: - args.eval_metric = 'hits' + args.eval_metric = "hits" directed = False evaluator = Evaluator(name=args.dataset) - if args.eval_metric == 'hits': + if args.eval_metric == "hits": loggers = { - 'Hits@20': Logger(args.runs, args), - 'Hits@50': Logger(args.runs, args), - 'Hits@100': Logger(args.runs, args), + "Hits@20": Logger(args.runs, args), + "Hits@50": Logger(args.runs, args), + "Hits@100": Logger(args.runs, args), } - elif args.eval_metric == 'mrr': + elif args.eval_metric == "mrr": loggers = { - 'MRR': Logger(args.runs, args), + "MRR": Logger(args.runs, args), } - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - path = dataset.root + '_seal{}'.format(data_appendix) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + path = dataset.root + "_seal{}".format(data_appendix) loaders = [] - prefetch_node_feats = ['feat'] if 'feat' in graph.ndata else None - prefetch_edge_feats = ['weight'] if 'weight' in graph.edata else None - - train_edge, train_edge_neg = get_pos_neg_edges('train', split_edge, graph, args.train_percent) - val_edge, val_edge_neg = get_pos_neg_edges('valid', split_edge, graph, args.val_percent) - test_edge, test_edge_neg = get_pos_neg_edges('test', split_edge, graph, args.test_percent) + prefetch_node_feats = ["feat"] if "feat" in graph.ndata else None + prefetch_edge_feats = ["weight"] if "weight" in graph.edata else None + + train_edge, train_edge_neg = get_pos_neg_edges( + "train", split_edge, graph, args.train_percent + ) + val_edge, val_edge_neg = get_pos_neg_edges( + "valid", split_edge, graph, args.val_percent + ) + test_edge, test_edge_neg = get_pos_neg_edges( + "test", split_edge, graph, args.test_percent + ) # create an augmented graph for sampling aug_g = dgl.graph(graph.edges()) - aug_g.edata['y'] = torch.ones(aug_g.num_edges()) - aug_edges = torch.cat([val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg]) - aug_labels = torch.cat([ - torch.ones(len(val_edge) + len(test_edge)), - torch.zeros(len(train_edge_neg) + len(val_edge_neg) + len(test_edge_neg)) - ]) - aug_g.add_edges(aug_edges[:, 0], aug_edges[:, 1], {'y': aug_labels}) + aug_g.edata["y"] = torch.ones(aug_g.num_edges()) + aug_edges = torch.cat( + [val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg] + ) + aug_labels = torch.cat( + [ + torch.ones(len(val_edge) + len(test_edge)), + torch.zeros( + len(train_edge_neg) + len(val_edge_neg) + len(test_edge_neg) + ), + ] + ) + aug_g.add_edges(aug_edges[:, 0], aug_edges[:, 1], {"y": aug_labels}) # eids for sampling - split_len = [graph.num_edges()] + \ - list(map(len, [val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg])) - train_eids = torch.cat([ - graph.edge_ids(train_edge[:, 0], train_edge[:, 1]), - torch.arange(sum(split_len[:3]), sum(split_len[:4])) - ]) - val_eids = torch.cat([ - torch.arange(sum(split_len[:1]), sum(split_len[:2])), - torch.arange(sum(split_len[:4]), sum(split_len[:5])) - ]) - test_eids = torch.cat([ - torch.arange(sum(split_len[:2]), sum(split_len[:3])), - torch.arange(sum(split_len[:5]), sum(split_len[:6])) - ]) - sampler = SealSampler(graph, 1, args.ratio_per_hop, directed, - prefetch_node_feats, prefetch_edge_feats) + split_len = [graph.num_edges()] + list( + map( + len, + [val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg], + ) + ) + train_eids = torch.cat( + [ + graph.edge_ids(train_edge[:, 0], train_edge[:, 1]), + torch.arange(sum(split_len[:3]), sum(split_len[:4])), + ] + ) + val_eids = torch.cat( + [ + torch.arange(sum(split_len[:1]), sum(split_len[:2])), + torch.arange(sum(split_len[:4]), sum(split_len[:5])), + ] + ) + test_eids = torch.cat( + [ + torch.arange(sum(split_len[:2]), sum(split_len[:3])), + torch.arange(sum(split_len[:5]), sum(split_len[:6])), + ] + ) + sampler = SealSampler( + graph, + 1, + args.ratio_per_hop, + directed, + prefetch_node_feats, + prefetch_edge_feats, + ) # force to be dynamic for consistent dataloading for split, shuffle, eids in zip( - ['train', 'valid', 'test'], + ["train", "valid", "test"], [True, False, False], - [train_eids, val_eids, test_eids] + [train_eids, val_eids, test_eids], ): - data_loader = DataLoader(aug_g, eids, sampler, shuffle=shuffle, device=device, - batch_size=args.batch_size, num_workers=args.num_workers) + data_loader = DataLoader( + aug_g, + eids, + sampler, + shuffle=shuffle, + device=device, + batch_size=args.batch_size, + num_workers=args.num_workers, + ) loaders.append(data_loader) train_loader, val_loader, test_loader = loaders @@ -474,16 +596,20 @@ def evaluate_mrr(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): k = max(k, 10) for run in range(args.runs): - model = DGCNN(args.hidden_channels, args.num_layers, k, - feature_dim=graph.ndata['feat'].size(1) if args.use_feature else 0).to(device) + model = DGCNN( + args.hidden_channels, + args.num_layers, + k, + feature_dim=graph.ndata["feat"].size(1) if args.use_feature else 0, + ).to(device) parameters = list(model.parameters()) optimizer = torch.optim.Adam(params=parameters, lr=args.lr) total_params = sum(p.numel() for param in parameters for p in param) - print(f'Total number of parameters is {total_params}') - print(f'SortPooling k is set to {k}') - with open(log_file, 'a') as f: - print(f'Total number of parameters is {total_params}', file=f) - print(f'SortPooling k is set to {k}', file=f) + print(f"Total number of parameters is {total_params}") + print(f"SortPooling k is set to {k}") + with open(log_file, "a") as f: + print(f"Total number of parameters is {total_params}", file=f) + print(f"SortPooling k is set to {k}", file=f) start_epoch = 1 # Training starts @@ -496,35 +622,41 @@ def evaluate_mrr(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): loggers[key].add_result(run, result) model_name = os.path.join( - args.res_dir, 'run{}_model_checkpoint{}.pth'.format(run+1, epoch)) + args.res_dir, + "run{}_model_checkpoint{}.pth".format(run + 1, epoch), + ) optimizer_name = os.path.join( - args.res_dir, 'run{}_optimizer_checkpoint{}.pth'.format(run+1, epoch)) + args.res_dir, + "run{}_optimizer_checkpoint{}.pth".format(run + 1, epoch), + ) torch.save(model.state_dict(), model_name) torch.save(optimizer.state_dict(), optimizer_name) for key, result in results.items(): valid_res, test_res = result - to_print = (f'Run: {run + 1:02d}, Epoch: {epoch:02d}, ' + - f'Loss: {loss:.4f}, Valid: {100 * valid_res:.2f}%, ' + - f'Test: {100 * test_res:.2f}%') + to_print = ( + f"Run: {run + 1:02d}, Epoch: {epoch:02d}, " + + f"Loss: {loss:.4f}, Valid: {100 * valid_res:.2f}%, " + + f"Test: {100 * test_res:.2f}%" + ) print(key) print(to_print) - with open(log_file, 'a') as f: + with open(log_file, "a") as f: print(key, file=f) print(to_print, file=f) for key in loggers.keys(): print(key) loggers[key].print_statistics(run) - with open(log_file, 'a') as f: + with open(log_file, "a") as f: print(key, file=f) loggers[key].print_statistics(run, f=f) for key in loggers.keys(): print(key) loggers[key].print_statistics() - with open(log_file, 'a') as f: + with open(log_file, "a") as f: print(key, file=f) loggers[key].print_statistics(f=f) - print(f'Total number of parameters is {total_params}') - print(f'Results are saved in {args.res_dir}') + print(f"Total number of parameters is {total_params}") + print(f"Results are saved in {args.res_dir}") diff --git a/examples/pytorch/ogb/sign/dataset.py b/examples/pytorch/ogb/sign/dataset.py index d5daf2e9a19b..b3d3de824e7f 100644 --- a/examples/pytorch/ogb/sign/dataset.py +++ b/examples/pytorch/ogb/sign/dataset.py @@ -1,8 +1,9 @@ -import torch import numpy as np +import torch +from ogb.nodeproppred import DglNodePropPredDataset, Evaluator + import dgl import dgl.function as fn -from ogb.nodeproppred import DglNodePropPredDataset, Evaluator def get_ogb_evaluator(dataset): @@ -10,10 +11,12 @@ def get_ogb_evaluator(dataset): Get evaluator from Open Graph Benchmark based on dataset """ evaluator = Evaluator(name=dataset) - return lambda preds, labels: evaluator.eval({ - "y_true": labels.view(-1, 1), - "y_pred": preds.view(-1, 1), - })["acc"] + return lambda preds, labels: evaluator.eval( + { + "y_true": labels.view(-1, 1), + "y_pred": preds.view(-1, 1), + } + )["acc"] def convert_mag_to_homograph(g, device): @@ -25,11 +28,13 @@ def convert_mag_to_homograph(g, device): src_writes, dst_writes = g.all_edges(etype="writes") src_topic, dst_topic = g.all_edges(etype="has_topic") src_aff, dst_aff = g.all_edges(etype="affiliated_with") - new_g = dgl.heterograph({ - ("paper", "written", "author"): (dst_writes, src_writes), - ("paper", "has_topic", "field"): (src_topic, dst_topic), - ("author", "aff", "inst"): (src_aff, dst_aff) - }) + new_g = dgl.heterograph( + { + ("paper", "written", "author"): (dst_writes, src_writes), + ("paper", "has_topic", "field"): (src_topic, dst_topic), + ("author", "aff", "inst"): (src_aff, dst_aff), + } + ) new_g = new_g.to(device) new_g.nodes["paper"].data["feat"] = g.nodes["paper"].data["feat"] new_g["written"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) @@ -65,7 +70,7 @@ def load_dataset(name, device): if name == "ogbn-arxiv": g = dgl.add_reverse_edges(g, copy_ndata=True) g = dgl.add_self_loop(g) - g.ndata['feat'] = g.ndata['feat'].float() + g.ndata["feat"] = g.ndata["feat"].float() elif name == "ogbn-mag": # MAG is a heterogeneous graph. The task is to make prediction for # paper nodes @@ -75,16 +80,18 @@ def load_dataset(name, device): test_nid = test_nid["paper"] g = convert_mag_to_homograph(g, device) else: - g.ndata['feat'] = g.ndata['feat'].float() + g.ndata["feat"] = g.ndata["feat"].float() n_classes = dataset.num_classes labels = labels.squeeze() evaluator = get_ogb_evaluator(name) - print(f"# Nodes: {g.number_of_nodes()}\n" - f"# Edges: {g.number_of_edges()}\n" - f"# Train: {len(train_nid)}\n" - f"# Val: {len(val_nid)}\n" - f"# Test: {len(test_nid)}\n" - f"# Classes: {n_classes}") + print( + f"# Nodes: {g.number_of_nodes()}\n" + f"# Edges: {g.number_of_edges()}\n" + f"# Train: {len(train_nid)}\n" + f"# Val: {len(val_nid)}\n" + f"# Test: {len(test_nid)}\n" + f"# Classes: {n_classes}" + ) return g, labels, n_classes, train_nid, val_nid, test_nid, evaluator diff --git a/examples/pytorch/ogb/sign/sign.py b/examples/pytorch/ogb/sign/sign.py index e7afd9e5332b..274ed361909e 100644 --- a/examples/pytorch/ogb/sign/sign.py +++ b/examples/pytorch/ogb/sign/sign.py @@ -1,11 +1,13 @@ import argparse import time + import numpy as np import torch import torch.nn as nn +from dataset import load_dataset + import dgl import dgl.function as fn -from dataset import load_dataset class FeedForwardNet(nn.Module): @@ -40,8 +42,16 @@ def forward(self, x): class SIGN(nn.Module): - def __init__(self, in_feats, hidden, out_feats, num_hops, n_layers, - dropout, input_drop): + def __init__( + self, + in_feats, + hidden, + out_feats, + num_hops, + n_layers, + dropout, + input_drop, + ): super(SIGN, self).__init__() self.dropout = nn.Dropout(dropout) self.prelu = nn.PReLU() @@ -49,9 +59,11 @@ def __init__(self, in_feats, hidden, out_feats, num_hops, n_layers, self.input_drop = nn.Dropout(input_drop) for hop in range(num_hops): self.inception_ffs.append( - FeedForwardNet(in_feats, hidden, hidden, n_layers, dropout)) - self.project = FeedForwardNet(num_hops * hidden, hidden, out_feats, - n_layers, dropout) + FeedForwardNet(in_feats, hidden, hidden, n_layers, dropout) + ) + self.project = FeedForwardNet( + num_hops * hidden, hidden, out_feats, n_layers, dropout + ) def forward(self, feats): feats = [self.input_drop(feat) for feat in feats] @@ -72,7 +84,7 @@ def get_n_params(model): for p in list(model.parameters()): nn = 1 for s in list(p.size()): - nn = nn*s + nn = nn * s pp += nn return pp @@ -84,8 +96,9 @@ def neighbor_average_features(g, args): print("Compute neighbor-averaged feats") g.ndata["feat_0"] = g.ndata["feat"] for hop in range(1, args.R + 1): - g.update_all(fn.copy_u(f"feat_{hop-1}", "msg"), - fn.mean("msg", f"feat_{hop}")) + g.update_all( + fn.copy_u(f"feat_{hop-1}", "msg"), fn.mean("msg", f"feat_{hop}") + ) res = [] for hop in range(args.R + 1): res.append(g.ndata.pop(f"feat_{hop}")) @@ -98,8 +111,9 @@ def neighbor_average_features(g, args): num_target = target_mask.sum().item() new_res = [] for x in res: - feat = torch.zeros((num_target,) + x.shape[1:], - dtype=x.dtype, device=x.device) + feat = torch.zeros( + (num_target,) + x.shape[1:], dtype=x.dtype, device=x.device + ) feat[target_ids] = x[target_mask] new_res.append(feat) res = new_res @@ -112,15 +126,23 @@ def prepare_data(device, args): """ data = load_dataset(args.dataset, device) g, labels, n_classes, train_nid, val_nid, test_nid, evaluator = data - in_feats = g.ndata['feat'].shape[1] + in_feats = g.ndata["feat"].shape[1] feats = neighbor_average_features(g, args) labels = labels.to(device) # move to device train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) - return feats, labels, in_feats, n_classes, \ - train_nid, val_nid, test_nid, evaluator + return ( + feats, + labels, + in_feats, + n_classes, + train_nid, + val_nid, + test_nid, + evaluator, + ) def train(model, feats, labels, loss_fcn, optimizer, train_loader): @@ -134,8 +156,9 @@ def train(model, feats, labels, loss_fcn, optimizer, train_loader): optimizer.step() -def test(model, feats, labels, test_loader, evaluator, - train_nid, val_nid, test_nid): +def test( + model, feats, labels, test_loader, evaluator, train_nid, val_nid, test_nid +): model.eval() device = labels.device preds = [] @@ -151,24 +174,44 @@ def test(model, feats, labels, test_loader, evaluator, def run(args, data, device): - feats, labels, in_size, num_classes, \ - train_nid, val_nid, test_nid, evaluator = data + ( + feats, + labels, + in_size, + num_classes, + train_nid, + val_nid, + test_nid, + evaluator, + ) = data train_loader = torch.utils.data.DataLoader( - train_nid, batch_size=args.batch_size, shuffle=True, drop_last=False) + train_nid, batch_size=args.batch_size, shuffle=True, drop_last=False + ) test_loader = torch.utils.data.DataLoader( - torch.arange(labels.shape[0]), batch_size=args.eval_batch_size, - shuffle=False, drop_last=False) + torch.arange(labels.shape[0]), + batch_size=args.eval_batch_size, + shuffle=False, + drop_last=False, + ) # Initialize model and optimizer for each run num_hops = args.R + 1 - model = SIGN(in_size, args.num_hidden, num_classes, num_hops, - args.ff_layer, args.dropout, args.input_dropout) + model = SIGN( + in_size, + args.num_hidden, + num_classes, + num_hops, + args.ff_layer, + args.dropout, + args.input_dropout, + ) model = model.to(device) print("# Params:", get_n_params(model)) loss_fcn = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, - weight_decay=args.weight_decay) + optimizer = torch.optim.Adam( + model.parameters(), lr=args.lr, weight_decay=args.weight_decay + ) # Start training best_epoch = 0 @@ -180,8 +223,16 @@ def run(args, data, device): if epoch % args.eval_every == 0: with torch.no_grad(): - acc = test(model, feats, labels, test_loader, evaluator, - train_nid, val_nid, test_nid) + acc = test( + model, + feats, + labels, + test_loader, + evaluator, + train_nid, + val_nid, + test_nid, + ) end = time.time() log = "Epoch {}, Time(s): {:.4f}, ".format(epoch, end - start) log += "Acc: Train {:.4f}, Val {:.4f}, Test {:.4f}".format(*acc) @@ -191,8 +242,11 @@ def run(args, data, device): best_val = acc[1] best_test = acc[2] - print("Best Epoch {}, Val {:.4f}, Test {:.4f}".format( - best_epoch, best_val, best_test)) + print( + "Best Epoch {}, Val {:.4f}, Test {:.4f}".format( + best_epoch, best_val, best_test + ) + ) return best_val, best_test @@ -212,34 +266,51 @@ def main(args): val_accs.append(best_val) test_accs.append(best_test) - print(f"Average val accuracy: {np.mean(val_accs):.4f}, " - f"std: {np.std(val_accs):.4f}") - print(f"Average test accuracy: {np.mean(test_accs):.4f}, " - f"std: {np.std(test_accs):.4f}") + print( + f"Average val accuracy: {np.mean(val_accs):.4f}, " + f"std: {np.std(val_accs):.4f}" + ) + print( + f"Average test accuracy: {np.mean(test_accs):.4f}, " + f"std: {np.std(test_accs):.4f}" + ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SIGN") parser.add_argument("--num-epochs", type=int, default=1000) parser.add_argument("--num-hidden", type=int, default=512) - parser.add_argument("--R", type=int, default=5, - help="number of hops") + parser.add_argument("--R", type=int, default=5, help="number of hops") parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--dataset", type=str, default="ogbn-mag") - parser.add_argument("--dropout", type=float, default=0.5, - help="dropout on activation") + parser.add_argument( + "--dropout", type=float, default=0.5, help="dropout on activation" + ) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--weight-decay", type=float, default=0) parser.add_argument("--eval-every", type=int, default=10) parser.add_argument("--batch-size", type=int, default=50000) - parser.add_argument("--eval-batch-size", type=int, default=100000, - help="evaluation batch size") - parser.add_argument("--ff-layer", type=int, default=2, - help="number of feed-forward layers") - parser.add_argument("--input-dropout", type=float, default=0, - help="dropout on input features") - parser.add_argument("--num-runs", type=int, default=10, - help="number of times to repeat the experiment") + parser.add_argument( + "--eval-batch-size", + type=int, + default=100000, + help="evaluation batch size", + ) + parser.add_argument( + "--ff-layer", type=int, default=2, help="number of feed-forward layers" + ) + parser.add_argument( + "--input-dropout", + type=float, + default=0, + help="dropout on input features", + ) + parser.add_argument( + "--num-runs", + type=int, + default=10, + help="number of times to repeat the experiment", + ) args = parser.parse_args() print(args) diff --git a/examples/pytorch/ogb_lsc/MAG240M/preprocess.py b/examples/pytorch/ogb_lsc/MAG240M/preprocess.py index 3c69919f6bc0..ed0a9487b744 100644 --- a/examples/pytorch/ogb_lsc/MAG240M/preprocess.py +++ b/examples/pytorch/ogb_lsc/MAG240M/preprocess.py @@ -1,67 +1,122 @@ -import ogb -from ogb.lsc import MAG240MDataset -import tqdm +import argparse +import os + import numpy as np +import ogb import torch +import tqdm +from ogb.lsc import MAG240MDataset + import dgl import dgl.function as fn -import argparse -import os parser = argparse.ArgumentParser() -parser.add_argument('--rootdir', type=str, default='.', help='Directory to download the OGB dataset.') -parser.add_argument('--author-output-path', type=str, help='Path to store the author features.') -parser.add_argument('--inst-output-path', type=str, - help='Path to store the institution features.') -parser.add_argument('--graph-output-path', type=str, help='Path to store the graph.') -parser.add_argument('--graph-format', type=str, default='csc', help='Graph format (coo, csr or csc).') -parser.add_argument('--graph-as-homogeneous', action='store_true', help='Store the graph as DGL homogeneous graph.') -parser.add_argument('--full-output-path', type=str, - help='Path to store features of all nodes. Effective only when graph is homogeneous.') +parser.add_argument( + "--rootdir", + type=str, + default=".", + help="Directory to download the OGB dataset.", +) +parser.add_argument( + "--author-output-path", type=str, help="Path to store the author features." +) +parser.add_argument( + "--inst-output-path", + type=str, + help="Path to store the institution features.", +) +parser.add_argument( + "--graph-output-path", type=str, help="Path to store the graph." +) +parser.add_argument( + "--graph-format", + type=str, + default="csc", + help="Graph format (coo, csr or csc).", +) +parser.add_argument( + "--graph-as-homogeneous", + action="store_true", + help="Store the graph as DGL homogeneous graph.", +) +parser.add_argument( + "--full-output-path", + type=str, + help="Path to store features of all nodes. Effective only when graph is homogeneous.", +) args = parser.parse_args() -print('Building graph') +print("Building graph") dataset = MAG240MDataset(root=args.rootdir) -ei_writes = dataset.edge_index('author', 'writes', 'paper') -ei_cites = dataset.edge_index('paper', 'paper') -ei_affiliated = dataset.edge_index('author', 'institution') +ei_writes = dataset.edge_index("author", "writes", "paper") +ei_cites = dataset.edge_index("paper", "paper") +ei_affiliated = dataset.edge_index("author", "institution") # We sort the nodes starting with the papers, then the authors, then the institutions. author_offset = 0 inst_offset = author_offset + dataset.num_authors paper_offset = inst_offset + dataset.num_institutions -g = dgl.heterograph({ - ('author', 'write', 'paper'): (ei_writes[0], ei_writes[1]), - ('paper', 'write-by', 'author'): (ei_writes[1], ei_writes[0]), - ('author', 'affiliate-with', 'institution'): (ei_affiliated[0], ei_affiliated[1]), - ('institution', 'affiliate', 'author'): (ei_affiliated[1], ei_affiliated[0]), - ('paper', 'cite', 'paper'): (np.concatenate([ei_cites[0], ei_cites[1]]), np.concatenate([ei_cites[1], ei_cites[0]])) - }) +g = dgl.heterograph( + { + ("author", "write", "paper"): (ei_writes[0], ei_writes[1]), + ("paper", "write-by", "author"): (ei_writes[1], ei_writes[0]), + ("author", "affiliate-with", "institution"): ( + ei_affiliated[0], + ei_affiliated[1], + ), + ("institution", "affiliate", "author"): ( + ei_affiliated[1], + ei_affiliated[0], + ), + ("paper", "cite", "paper"): ( + np.concatenate([ei_cites[0], ei_cites[1]]), + np.concatenate([ei_cites[1], ei_cites[0]]), + ), + } +) paper_feat = dataset.paper_feat -author_feat = np.memmap(args.author_output_path, mode='w+', dtype='float16', shape=(dataset.num_authors, dataset.num_paper_features)) -inst_feat = np.memmap(args.inst_output_path, mode='w+', dtype='float16', shape=(dataset.num_institutions, dataset.num_paper_features)) +author_feat = np.memmap( + args.author_output_path, + mode="w+", + dtype="float16", + shape=(dataset.num_authors, dataset.num_paper_features), +) +inst_feat = np.memmap( + args.inst_output_path, + mode="w+", + dtype="float16", + shape=(dataset.num_institutions, dataset.num_paper_features), +) # Iteratively process author features along the feature dimension. BLOCK_COLS = 16 with tqdm.trange(0, dataset.num_paper_features, BLOCK_COLS) as tq: for start in tq: - tq.set_postfix_str('Reading paper features...') - g.nodes['paper'].data['x'] = torch.FloatTensor(paper_feat[:, start:start + BLOCK_COLS].astype('float32')) + tq.set_postfix_str("Reading paper features...") + g.nodes["paper"].data["x"] = torch.FloatTensor( + paper_feat[:, start : start + BLOCK_COLS].astype("float32") + ) # Compute author features... - tq.set_postfix_str('Computing author features...') - g.update_all(fn.copy_u('x', 'm'), fn.mean('m', 'x'), etype='write-by') + tq.set_postfix_str("Computing author features...") + g.update_all(fn.copy_u("x", "m"), fn.mean("m", "x"), etype="write-by") # Then institution features... - tq.set_postfix_str('Computing institution features...') - g.update_all(fn.copy_u('x', 'm'), fn.mean('m', 'x'), etype='affiliate-with') - tq.set_postfix_str('Writing author features...') - author_feat[:, start:start + BLOCK_COLS] = g.nodes['author'].data['x'].numpy().astype('float16') - tq.set_postfix_str('Writing institution features...') - inst_feat[:, start:start + BLOCK_COLS] = g.nodes['institution'].data['x'].numpy().astype('float16') - del g.nodes['paper'].data['x'] - del g.nodes['author'].data['x'] - del g.nodes['institution'].data['x'] + tq.set_postfix_str("Computing institution features...") + g.update_all( + fn.copy_u("x", "m"), fn.mean("m", "x"), etype="affiliate-with" + ) + tq.set_postfix_str("Writing author features...") + author_feat[:, start : start + BLOCK_COLS] = ( + g.nodes["author"].data["x"].numpy().astype("float16") + ) + tq.set_postfix_str("Writing institution features...") + inst_feat[:, start : start + BLOCK_COLS] = ( + g.nodes["institution"].data["x"].numpy().astype("float16") + ) + del g.nodes["paper"].data["x"] + del g.nodes["author"].data["x"] + del g.nodes["institution"].data["x"] author_feat.flush() inst_feat.flush() @@ -73,34 +128,56 @@ # DGL also ensures that the node types are sorted in ascending order. assert torch.equal( g.ndata[dgl.NTYPE], - torch.cat([torch.full((dataset.num_authors,), 0), - torch.full((dataset.num_institutions,), 1), - torch.full((dataset.num_papers,), 2)])) + torch.cat( + [ + torch.full((dataset.num_authors,), 0), + torch.full((dataset.num_institutions,), 1), + torch.full((dataset.num_papers,), 2), + ] + ), + ) assert torch.equal( g.ndata[dgl.NID], - torch.cat([torch.arange(dataset.num_authors), - torch.arange(dataset.num_institutions), - torch.arange(dataset.num_papers)])) - g.edata['etype'] = g.edata[dgl.ETYPE].byte() + torch.cat( + [ + torch.arange(dataset.num_authors), + torch.arange(dataset.num_institutions), + torch.arange(dataset.num_papers), + ] + ), + ) + g.edata["etype"] = g.edata[dgl.ETYPE].byte() del g.edata[dgl.ETYPE] del g.ndata[dgl.NTYPE] del g.ndata[dgl.NID] # Process feature full_feat = np.memmap( - args.full_output_path, mode='w+', dtype='float16', - shape=(dataset.num_authors + dataset.num_institutions + dataset.num_papers, dataset.num_paper_features)) + args.full_output_path, + mode="w+", + dtype="float16", + shape=( + dataset.num_authors + dataset.num_institutions + dataset.num_papers, + dataset.num_paper_features, + ), + ) BLOCK_ROWS = 100000 for start in tqdm.trange(0, dataset.num_authors, BLOCK_ROWS): end = min(dataset.num_authors, start + BLOCK_ROWS) - full_feat[author_offset + start:author_offset + end] = author_feat[start:end] + full_feat[author_offset + start : author_offset + end] = author_feat[ + start:end + ] for start in tqdm.trange(0, dataset.num_institutions, BLOCK_ROWS): end = min(dataset.num_institutions, start + BLOCK_ROWS) - full_feat[inst_offset + start:inst_offset + end] = inst_feat[start:end] + full_feat[inst_offset + start : inst_offset + end] = inst_feat[ + start:end + ] for start in tqdm.trange(0, dataset.num_papers, BLOCK_ROWS): end = min(dataset.num_papers, start + BLOCK_ROWS) - full_feat[paper_offset + start:paper_offset + end] = paper_feat[start:end] - + full_feat[paper_offset + start : paper_offset + end] = paper_feat[ + start:end + ] + # Convert the graph to the given format and save. (The RGAT baseline needs CSC graph) g = g.formats(args.graph_format) dgl.save_graphs(args.graph_output_path, g) diff --git a/examples/pytorch/ogb_lsc/MAG240M/train.py b/examples/pytorch/ogb_lsc/MAG240M/train.py index 9f6c40460afa..42e208dd51f9 100644 --- a/examples/pytorch/ogb_lsc/MAG240M/train.py +++ b/examples/pytorch/ogb_lsc/MAG240M/train.py @@ -1,65 +1,99 @@ #!/usr/bin/env python # coding: utf-8 +import argparse +import time + +import numpy as np import ogb -from ogb.lsc import MAG240MDataset, MAG240MEvaluator -import dgl import torch -import numpy as np -import time +import torch.nn as nn +import torch.nn.functional as F import tqdm +from ogb.lsc import MAG240MDataset, MAG240MEvaluator + +import dgl import dgl.function as fn -import numpy as np import dgl.nn as dglnn -import torch.nn as nn -import torch.nn.functional as F -import argparse + class RGAT(nn.Module): - def __init__(self, in_channels, out_channels, hidden_channels, num_etypes, num_layers, num_heads, dropout, pred_ntype): + def __init__( + self, + in_channels, + out_channels, + hidden_channels, + num_etypes, + num_layers, + num_heads, + dropout, + pred_ntype, + ): super().__init__() self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.skips = nn.ModuleList() - - self.convs.append(nn.ModuleList([ - dglnn.GATConv(in_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True) - for _ in range(num_etypes) - ])) + + self.convs.append( + nn.ModuleList( + [ + dglnn.GATConv( + in_channels, + hidden_channels // num_heads, + num_heads, + allow_zero_in_degree=True, + ) + for _ in range(num_etypes) + ] + ) + ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(in_channels, hidden_channels)) for _ in range(num_layers - 1): - self.convs.append(nn.ModuleList([ - dglnn.GATConv(hidden_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True) - for _ in range(num_etypes) - ])) + self.convs.append( + nn.ModuleList( + [ + dglnn.GATConv( + hidden_channels, + hidden_channels // num_heads, + num_heads, + allow_zero_in_degree=True, + ) + for _ in range(num_etypes) + ] + ) + ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(hidden_channels, hidden_channels)) - + self.mlp = nn.Sequential( nn.Linear(hidden_channels, hidden_channels), nn.BatchNorm1d(hidden_channels), nn.ReLU(), nn.Dropout(dropout), - nn.Linear(hidden_channels, out_channels) + nn.Linear(hidden_channels, out_channels), ) self.dropout = nn.Dropout(dropout) - + self.hidden_channels = hidden_channels self.pred_ntype = pred_ntype self.num_etypes = num_etypes - + def forward(self, mfgs, x): for i in range(len(mfgs)): mfg = mfgs[i] - x_dst = x[:mfg.num_dst_nodes()] + x_dst = x[: mfg.num_dst_nodes()] n_src = mfg.num_src_nodes() n_dst = mfg.num_dst_nodes() mfg = dgl.block_to_graph(mfg) x_skip = self.skips[i](x_dst) for j in range(self.num_etypes): - subg = mfg.edge_subgraph(mfg.edata['etype'] == j, relabel_nodes=False) - x_skip += self.convs[i][j](subg, (x, x_dst)).view(-1, self.hidden_channels) + subg = mfg.edge_subgraph( + mfg.edata["etype"] == j, relabel_nodes=False + ) + x_skip += self.convs[i][j](subg, (x, x_dst)).view( + -1, self.hidden_channels + ) x = self.norms[i](x_skip) x = F.elu(x) x = self.dropout(x) @@ -76,27 +110,34 @@ def __init__(self, g, idx, sampler, offset, feats, label): def collate(self, items): input_nodes, output_nodes, mfgs = super().collate(items) # Copy input features - mfgs[0].srcdata['x'] = torch.FloatTensor(self.feats[input_nodes]) - mfgs[-1].dstdata['y'] = torch.LongTensor(self.label[output_nodes - self.offset]) + mfgs[0].srcdata["x"] = torch.FloatTensor(self.feats[input_nodes]) + mfgs[-1].dstdata["y"] = torch.LongTensor( + self.label[output_nodes - self.offset] + ) return input_nodes, output_nodes, mfgs + def train(args, dataset, g, feats, paper_offset): - print('Loading masks and labels') - train_idx = torch.LongTensor(dataset.get_idx_split('train')) + paper_offset - valid_idx = torch.LongTensor(dataset.get_idx_split('valid')) + paper_offset + print("Loading masks and labels") + train_idx = torch.LongTensor(dataset.get_idx_split("train")) + paper_offset + valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset label = dataset.paper_label - print('Initializing dataloader...') + print("Initializing dataloader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 25]) - train_collator = ExternalNodeCollator(g, train_idx, sampler, paper_offset, feats, label) - valid_collator = ExternalNodeCollator(g, valid_idx, sampler, paper_offset, feats, label) + train_collator = ExternalNodeCollator( + g, train_idx, sampler, paper_offset, feats, label + ) + valid_collator = ExternalNodeCollator( + g, valid_idx, sampler, paper_offset, feats, label + ) train_dataloader = torch.utils.data.DataLoader( train_collator.dataset, batch_size=1024, shuffle=True, drop_last=False, collate_fn=train_collator.collate, - num_workers=4 + num_workers=4, ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, @@ -104,11 +145,20 @@ def train(args, dataset, g, feats, paper_offset): shuffle=True, drop_last=False, collate_fn=valid_collator.collate, - num_workers=2 + num_workers=2, ) - print('Initializing model...') - model = RGAT(dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, 'paper').cuda() + print("Initializing model...") + model = RGAT( + dataset.num_paper_features, + dataset.num_classes, + 1024, + 5, + 2, + 4, + 0.5, + "paper", + ).cuda() opt = torch.optim.Adam(model.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.StepLR(opt, step_size=25, gamma=0.25) @@ -118,114 +168,170 @@ def train(args, dataset, g, feats, paper_offset): model.train() with tqdm.tqdm(train_dataloader) as tq: for i, (input_nodes, output_nodes, mfgs) in enumerate(tq): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() acc = (y_hat.argmax(1) == y).float().mean() - tq.set_postfix({'loss': '%.4f' % loss.item(), 'acc': '%.4f' % acc.item()}, refresh=False) + tq.set_postfix( + {"loss": "%.4f" % loss.item(), "acc": "%.4f" % acc.item()}, + refresh=False, + ) model.eval() correct = total = 0 - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(valid_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(valid_dataloader) + ): with torch.no_grad(): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total - print('Validation accuracy:', acc) + print("Validation accuracy:", acc) sched.step() if best_acc < acc: best_acc = acc - print('Updating best model...') + print("Updating best model...") torch.save(model.state_dict(), args.model_path) + def test(args, dataset, g, feats, paper_offset): - print('Loading masks and labels...') - valid_idx = torch.LongTensor(dataset.get_idx_split('valid')) + paper_offset - test_idx = torch.LongTensor(dataset.get_idx_split('test')) + paper_offset + print("Loading masks and labels...") + valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset + test_idx = torch.LongTensor(dataset.get_idx_split("test")) + paper_offset label = dataset.paper_label - print('Initializing data loader...') + print("Initializing data loader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([160, 160]) - valid_collator = ExternalNodeCollator(g, valid_idx, sampler, paper_offset, feats, label) + valid_collator = ExternalNodeCollator( + g, valid_idx, sampler, paper_offset, feats, label + ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=valid_collator.collate, - num_workers=2 + num_workers=2, + ) + test_collator = ExternalNodeCollator( + g, test_idx, sampler, paper_offset, feats, label ) - test_collator = ExternalNodeCollator(g, test_idx, sampler, paper_offset, feats, label) test_dataloader = torch.utils.data.DataLoader( test_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=test_collator.collate, - num_workers=4 + num_workers=4, ) - print('Loading model...') - model = RGAT(dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, 'paper').cuda() + print("Loading model...") + model = RGAT( + dataset.num_paper_features, + dataset.num_classes, + 1024, + 5, + 2, + 4, + 0.5, + "paper", + ).cuda() model.load_state_dict(torch.load(args.model_path)) model.eval() correct = total = 0 - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(valid_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(valid_dataloader) + ): with torch.no_grad(): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total - print('Validation accuracy:', acc) + print("Validation accuracy:", acc) evaluator = MAG240MEvaluator() y_preds = [] - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(test_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(test_dataloader) + ): with torch.no_grad(): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) y_preds.append(y_hat.argmax(1).cpu()) - evaluator.save_test_submission({'y_pred': torch.cat(y_preds)}, args.submission_path) + evaluator.save_test_submission( + {"y_pred": torch.cat(y_preds)}, args.submission_path + ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--rootdir', type=str, default='.', help='Directory to download the OGB dataset.') - parser.add_argument('--graph-path', type=str, default='./graph.dgl', help='Path to the graph.') - parser.add_argument('--full-feature-path', type=str, default='./full.npy', - help='Path to the features of all nodes.') - parser.add_argument('--epochs', type=int, default=100, help='Number of epochs.') - parser.add_argument('--model-path', type=str, default='./model.pt', help='Path to store the best model.') - parser.add_argument('--submission-path', type=str, default='./results', help='Submission directory.') + parser.add_argument( + "--rootdir", + type=str, + default=".", + help="Directory to download the OGB dataset.", + ) + parser.add_argument( + "--graph-path", + type=str, + default="./graph.dgl", + help="Path to the graph.", + ) + parser.add_argument( + "--full-feature-path", + type=str, + default="./full.npy", + help="Path to the features of all nodes.", + ) + parser.add_argument( + "--epochs", type=int, default=100, help="Number of epochs." + ) + parser.add_argument( + "--model-path", + type=str, + default="./model.pt", + help="Path to store the best model.", + ) + parser.add_argument( + "--submission-path", + type=str, + default="./results", + help="Submission directory.", + ) args = parser.parse_args() dataset = MAG240MDataset(root=args.rootdir) - print('Loading graph') + print("Loading graph") (g,), _ = dgl.load_graphs(args.graph_path) - g = g.formats(['csc']) + g = g.formats(["csc"]) - print('Loading features') + print("Loading features") paper_offset = dataset.num_authors + dataset.num_institutions num_nodes = paper_offset + dataset.num_papers num_features = dataset.num_paper_features - feats = np.memmap(args.full_feature_path, mode='r', dtype='float16', shape=(num_nodes, num_features)) + feats = np.memmap( + args.full_feature_path, + mode="r", + dtype="float16", + shape=(num_nodes, num_features), + ) if args.epochs != 0: train(args, dataset, g, feats, paper_offset) diff --git a/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py b/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py index b9e599d4077b..3eebd648ec91 100644 --- a/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py +++ b/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py @@ -1,41 +1,69 @@ #!/usr/bin/env python # coding: utf-8 +import argparse import math +import sys +from collections import OrderedDict -from ogb.lsc import MAG240MDataset, MAG240MEvaluator -import dgl -import torch -import tqdm import numpy as np -import dgl.nn as dglnn +import torch +import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F -import argparse -import torch.multiprocessing as mp -import sys +import tqdm +from ogb.lsc import MAG240MDataset, MAG240MEvaluator from torch.nn.parallel import DistributedDataParallel -from collections import OrderedDict + +import dgl +import dgl.nn as dglnn class RGAT(nn.Module): - def __init__(self, in_channels, out_channels, hidden_channels, num_etypes, num_layers, num_heads, dropout, - pred_ntype): + def __init__( + self, + in_channels, + out_channels, + hidden_channels, + num_etypes, + num_layers, + num_heads, + dropout, + pred_ntype, + ): super().__init__() self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.skips = nn.ModuleList() - self.convs.append(nn.ModuleList([ - dglnn.GATConv(in_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True) - for _ in range(num_etypes) - ])) + self.convs.append( + nn.ModuleList( + [ + dglnn.GATConv( + in_channels, + hidden_channels // num_heads, + num_heads, + allow_zero_in_degree=True, + ) + for _ in range(num_etypes) + ] + ) + ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(in_channels, hidden_channels)) for _ in range(num_layers - 1): - self.convs.append(nn.ModuleList([ - dglnn.GATConv(hidden_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True) - for _ in range(num_etypes) - ])) + self.convs.append( + nn.ModuleList( + [ + dglnn.GATConv( + hidden_channels, + hidden_channels // num_heads, + num_heads, + allow_zero_in_degree=True, + ) + for _ in range(num_etypes) + ] + ) + ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(hidden_channels, hidden_channels)) @@ -44,7 +72,7 @@ def __init__(self, in_channels, out_channels, hidden_channels, num_etypes, num_l nn.BatchNorm1d(hidden_channels), nn.ReLU(), nn.Dropout(dropout), - nn.Linear(hidden_channels, out_channels) + nn.Linear(hidden_channels, out_channels), ) self.dropout = nn.Dropout(dropout) @@ -55,14 +83,18 @@ def __init__(self, in_channels, out_channels, hidden_channels, num_etypes, num_l def forward(self, mfgs, x): for i in range(len(mfgs)): mfg = mfgs[i] - x_dst = x[:mfg.num_dst_nodes()] + x_dst = x[: mfg.num_dst_nodes()] n_src = mfg.num_src_nodes() n_dst = mfg.num_dst_nodes() mfg = dgl.block_to_graph(mfg) x_skip = self.skips[i](x_dst) for j in range(self.num_etypes): - subg = mfg.edge_subgraph(mfg.edata['etype'] == j, relabel_nodes=False) - x_skip += self.convs[i][j](subg, (x, x_dst)).view(-1, self.hidden_channels) + subg = mfg.edge_subgraph( + mfg.edata["etype"] == j, relabel_nodes=False + ) + x_skip += self.convs[i][j](subg, (x, x_dst)).view( + -1, self.hidden_channels + ) x = self.norms[i](x_skip) x = F.elu(x) x = self.dropout(x) @@ -79,46 +111,65 @@ def __init__(self, g, idx, sampler, offset, feats, label): def collate(self, items): input_nodes, output_nodes, mfgs = super().collate(items) # Copy input features - mfgs[0].srcdata['x'] = torch.FloatTensor(self.feats[input_nodes]) - mfgs[-1].dstdata['y'] = torch.LongTensor(self.label[output_nodes - self.offset]) + mfgs[0].srcdata["x"] = torch.FloatTensor(self.feats[input_nodes]) + mfgs[-1].dstdata["y"] = torch.LongTensor( + self.label[output_nodes - self.offset] + ) return input_nodes, output_nodes, mfgs def train(proc_id, n_gpus, args, dataset, g, feats, paper_offset): dev_id = devices[proc_id] if n_gpus > 1: - dist_init_method = 'tcp://{master_ip}:{master_port}'.format( - master_ip='127.0.0.1', master_port='12346') + dist_init_method = "tcp://{master_ip}:{master_port}".format( + master_ip="127.0.0.1", master_port="12346" + ) world_size = n_gpus - torch.distributed.init_process_group(backend='nccl', - init_method=dist_init_method, - world_size=world_size, - rank=proc_id) + torch.distributed.init_process_group( + backend="nccl", + init_method=dist_init_method, + world_size=world_size, + rank=proc_id, + ) torch.cuda.set_device(dev_id) - print('Loading masks and labels') - train_idx = torch.LongTensor(dataset.get_idx_split('train')) + paper_offset - valid_idx = torch.LongTensor(dataset.get_idx_split('valid')) + paper_offset + print("Loading masks and labels") + train_idx = torch.LongTensor(dataset.get_idx_split("train")) + paper_offset + valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset label = dataset.paper_label - print('Initializing dataloader...') + print("Initializing dataloader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 25]) - train_collator = ExternalNodeCollator(g, train_idx, sampler, paper_offset, feats, label) - valid_collator = ExternalNodeCollator(g, valid_idx, sampler, paper_offset, feats, label) + train_collator = ExternalNodeCollator( + g, train_idx, sampler, paper_offset, feats, label + ) + valid_collator = ExternalNodeCollator( + g, valid_idx, sampler, paper_offset, feats, label + ) # Necessary according to https://yangkky.github.io/2019/07/08/distributed-pytorch-tutorial.html train_sampler = torch.utils.data.distributed.DistributedSampler( - train_collator.dataset, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False) + train_collator.dataset, + num_replicas=world_size, + rank=proc_id, + shuffle=True, + drop_last=False, + ) valid_sampler = torch.utils.data.distributed.DistributedSampler( - valid_collator.dataset, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False) + valid_collator.dataset, + num_replicas=world_size, + rank=proc_id, + shuffle=True, + drop_last=False, + ) train_dataloader = torch.utils.data.DataLoader( train_collator.dataset, batch_size=1024, collate_fn=train_collator.collate, num_workers=4, - sampler=train_sampler + sampler=train_sampler, ) valid_dataloader = torch.utils.data.DataLoader( @@ -126,16 +177,27 @@ def train(proc_id, n_gpus, args, dataset, g, feats, paper_offset): batch_size=1024, collate_fn=valid_collator.collate, num_workers=2, - sampler=valid_sampler + sampler=valid_sampler, ) - print('Initializing model...') - model = RGAT(dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, 'paper').to(dev_id) + print("Initializing model...") + model = RGAT( + dataset.num_paper_features, + dataset.num_classes, + 1024, + 5, + 2, + 4, + 0.5, + "paper", + ).to(dev_id) # convert BN to SyncBatchNorm. see https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html model = nn.SyncBatchNorm.convert_sync_batchnorm(model) - model = DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id) + model = DistributedDataParallel( + model, device_ids=[dev_id], output_device=dev_id + ) opt = torch.optim.Adam(model.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.StepLR(opt, step_size=25, gamma=0.25) @@ -149,75 +211,97 @@ def train(proc_id, n_gpus, args, dataset, g, feats, paper_offset): with tqdm.tqdm(train_dataloader) as tq: for i, (input_nodes, output_nodes, mfgs) in enumerate(tq): mfgs = [g.to(dev_id) for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() acc = (y_hat.argmax(1) == y).float().mean() - tq.set_postfix({'loss': '%.4f' % loss.item(), 'acc': '%.4f' % acc.item()}, refresh=False) + tq.set_postfix( + {"loss": "%.4f" % loss.item(), "acc": "%.4f" % acc.item()}, + refresh=False, + ) # eval in each process model.eval() correct = torch.LongTensor([0]).to(dev_id) total = torch.LongTensor([0]).to(dev_id) - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(valid_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(valid_dataloader) + ): with torch.no_grad(): mfgs = [g.to(dev_id) for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] # `reduce` data into process 0 - torch.distributed.reduce(correct, dst=0, op=torch.distributed.ReduceOp.SUM) - torch.distributed.reduce(total, dst=0, op=torch.distributed.ReduceOp.SUM) + torch.distributed.reduce( + correct, dst=0, op=torch.distributed.ReduceOp.SUM + ) + torch.distributed.reduce( + total, dst=0, op=torch.distributed.ReduceOp.SUM + ) acc = (correct / total).item() sched.step() # process 0 print accuracy and save model if proc_id == 0: - print('Validation accuracy:', acc) + print("Validation accuracy:", acc) if best_acc < acc: best_acc = acc - print('Updating best model...') + print("Updating best model...") torch.save(model.state_dict(), args.model_path) def test(args, dataset, g, feats, paper_offset): - print('Loading masks and labels...') - valid_idx = torch.LongTensor(dataset.get_idx_split('valid')) + paper_offset - test_idx = torch.LongTensor(dataset.get_idx_split('test')) + paper_offset + print("Loading masks and labels...") + valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset + test_idx = torch.LongTensor(dataset.get_idx_split("test")) + paper_offset label = dataset.paper_label - print('Initializing data loader...') + print("Initializing data loader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([160, 160]) - valid_collator = ExternalNodeCollator(g, valid_idx, sampler, paper_offset, feats, label) + valid_collator = ExternalNodeCollator( + g, valid_idx, sampler, paper_offset, feats, label + ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=valid_collator.collate, - num_workers=2 + num_workers=2, + ) + test_collator = ExternalNodeCollator( + g, test_idx, sampler, paper_offset, feats, label ) - test_collator = ExternalNodeCollator(g, test_idx, sampler, paper_offset, feats, label) test_dataloader = torch.utils.data.DataLoader( test_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=test_collator.collate, - num_workers=4 + num_workers=4, ) - print('Loading model...') - model = RGAT(dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, 'paper').cuda() + print("Loading model...") + model = RGAT( + dataset.num_paper_features, + dataset.num_classes, + 1024, + 5, + 2, + 4, + 0.5, + "paper", + ).cuda() # load ddp's model parameters, we need to remove the name of 'module.' state_dict = torch.load(args.model_path) @@ -229,41 +313,73 @@ def test(args, dataset, g, feats, paper_offset): model.eval() correct = total = 0 - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(valid_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(valid_dataloader) + ): with torch.no_grad(): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total - print('Validation accuracy:', acc) + print("Validation accuracy:", acc) evaluator = MAG240MEvaluator() y_preds = [] - for i, (input_nodes, output_nodes, mfgs) in enumerate(tqdm.tqdm(test_dataloader)): + for i, (input_nodes, output_nodes, mfgs) in enumerate( + tqdm.tqdm(test_dataloader) + ): with torch.no_grad(): - mfgs = [g.to('cuda') for g in mfgs] - x = mfgs[0].srcdata['x'] - y = mfgs[-1].dstdata['y'] + mfgs = [g.to("cuda") for g in mfgs] + x = mfgs[0].srcdata["x"] + y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) y_preds.append(y_hat.argmax(1).cpu()) - evaluator.save_test_submission({'y_pred': torch.cat(y_preds)}, args.submission_path) + evaluator.save_test_submission( + {"y_pred": torch.cat(y_preds)}, args.submission_path + ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--rootdir', type=str, default='.', help='Directory to download the OGB dataset.') - parser.add_argument('--graph-path', type=str, default='./graph.dgl', help='Path to the graph.') - parser.add_argument('--full-feature-path', type=str, default='./full.npy', - help='Path to the features of all nodes.') - parser.add_argument('--epochs', type=int, default=100, help='Number of epochs.') - parser.add_argument('--model-path', type=str, default='./model_ddp.pt', help='Path to store the best model.') - parser.add_argument('--submission-path', type=str, default='./results_ddp', help='Submission directory.') - parser.add_argument('--gpus', type=str, default='0,1,2') + parser.add_argument( + "--rootdir", + type=str, + default=".", + help="Directory to download the OGB dataset.", + ) + parser.add_argument( + "--graph-path", + type=str, + default="./graph.dgl", + help="Path to the graph.", + ) + parser.add_argument( + "--full-feature-path", + type=str, + default="./full.npy", + help="Path to the features of all nodes.", + ) + parser.add_argument( + "--epochs", type=int, default=100, help="Number of epochs." + ) + parser.add_argument( + "--model-path", + type=str, + default="./model_ddp.pt", + help="Path to store the best model.", + ) + parser.add_argument( + "--submission-path", + type=str, + default="./results_ddp", + help="Submission directory.", + ) + parser.add_argument("--gpus", type=str, default="0,1,2") args = parser.parse_args() - devices = list(map(int, args.gpus.split(','))) + devices = list(map(int, args.gpus.split(","))) n_gpus = len(devices) if n_gpus <= 1: @@ -272,16 +388,25 @@ def test(args, dataset, g, feats, paper_offset): dataset = MAG240MDataset(root=args.rootdir) - print('Loading graph') + print("Loading graph") (g,), _ = dgl.load_graphs(args.graph_path) - g = g.formats(['csc']) + g = g.formats(["csc"]) - print('Loading features') + print("Loading features") paper_offset = dataset.num_authors + dataset.num_institutions num_nodes = paper_offset + dataset.num_papers num_features = dataset.num_paper_features - feats = np.memmap(args.full_feature_path, mode='r', dtype='float16', shape=(num_nodes, num_features)) + feats = np.memmap( + args.full_feature_path, + mode="r", + dtype="float16", + shape=(num_nodes, num_features), + ) - mp.spawn(train, args=(n_gpus, args, dataset, g, feats, paper_offset), nprocs=n_gpus) + mp.spawn( + train, + args=(n_gpus, args, dataset, g, feats, paper_offset), + nprocs=n_gpus, + ) test(args, dataset, g, feats, paper_offset) diff --git a/examples/pytorch/ogb_lsc/PCQM4M/conv.py b/examples/pytorch/ogb_lsc/PCQM4M/conv.py index d30235ceefd4..51cda195fd9e 100644 --- a/examples/pytorch/ogb_lsc/PCQM4M/conv.py +++ b/examples/pytorch/ogb_lsc/PCQM4M/conv.py @@ -1,52 +1,56 @@ -import dgl -import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F +from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder +import dgl +import dgl.function as fn from dgl.nn.pytorch import SumPooling -from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder + ### GIN convolution along the graph structure class GINConv(nn.Module): def __init__(self, emb_dim): - ''' - emb_dim (int): node embedding dimensionality - ''' + """ + emb_dim (int): node embedding dimensionality + """ super(GINConv, self).__init__() - self.mlp = nn.Sequential(nn.Linear(emb_dim, emb_dim), - nn.BatchNorm1d(emb_dim), - nn.ReLU(), - nn.Linear(emb_dim, emb_dim)) + self.mlp = nn.Sequential( + nn.Linear(emb_dim, emb_dim), + nn.BatchNorm1d(emb_dim), + nn.ReLU(), + nn.Linear(emb_dim, emb_dim), + ) self.eps = nn.Parameter(torch.Tensor([0])) - self.bond_encoder = BondEncoder(emb_dim = emb_dim) + self.bond_encoder = BondEncoder(emb_dim=emb_dim) def forward(self, g, x, edge_attr): with g.local_scope(): edge_embedding = self.bond_encoder(edge_attr) - g.ndata['x'] = x - g.apply_edges(fn.copy_u('x', 'm')) - g.edata['m'] = F.relu(g.edata['m'] + edge_embedding) - g.update_all(fn.copy_e('m', 'm'), fn.sum('m', 'new_x')) - out = self.mlp((1 + self.eps) * x + g.ndata['new_x']) + g.ndata["x"] = x + g.apply_edges(fn.copy_u("x", "m")) + g.edata["m"] = F.relu(g.edata["m"] + edge_embedding) + g.update_all(fn.copy_e("m", "m"), fn.sum("m", "new_x")) + out = self.mlp((1 + self.eps) * x + g.ndata["new_x"]) return out + ### GCN convolution along the graph structure class GCNConv(nn.Module): def __init__(self, emb_dim): - ''' - emb_dim (int): node embedding dimensionality - ''' + """ + emb_dim (int): node embedding dimensionality + """ super(GCNConv, self).__init__() self.linear = nn.Linear(emb_dim, emb_dim) self.root_emb = nn.Embedding(1, emb_dim) - self.bond_encoder = BondEncoder(emb_dim = emb_dim) + self.bond_encoder = BondEncoder(emb_dim=emb_dim) def forward(self, g, x, edge_attr): with g.local_scope(): @@ -56,29 +60,43 @@ def forward(self, g, x, edge_attr): # Molecular graphs are undirected # g.out_degrees() is the same as g.in_degrees() degs = (g.out_degrees().float() + 1).to(x.device) - norm = torch.pow(degs, -0.5).unsqueeze(-1) # (N, 1) - g.ndata['norm'] = norm - g.apply_edges(fn.u_mul_v('norm', 'norm', 'norm')) - - g.ndata['x'] = x - g.apply_edges(fn.copy_u('x', 'm')) - g.edata['m'] = g.edata['norm'] * F.relu(g.edata['m'] + edge_embedding) - g.update_all(fn.copy_e('m', 'm'), fn.sum('m', 'new_x')) - out = g.ndata['new_x'] + F.relu(x + self.root_emb.weight) * 1. / degs.view(-1, 1) + norm = torch.pow(degs, -0.5).unsqueeze(-1) # (N, 1) + g.ndata["norm"] = norm + g.apply_edges(fn.u_mul_v("norm", "norm", "norm")) + + g.ndata["x"] = x + g.apply_edges(fn.copy_u("x", "m")) + g.edata["m"] = g.edata["norm"] * F.relu( + g.edata["m"] + edge_embedding + ) + g.update_all(fn.copy_e("m", "m"), fn.sum("m", "new_x")) + out = g.ndata["new_x"] + F.relu( + x + self.root_emb.weight + ) * 1.0 / degs.view(-1, 1) return out + ### GNN to generate node embedding class GNN_node(nn.Module): """ Output: node representations """ - def __init__(self, num_layers, emb_dim, drop_ratio = 0.5, JK = "last", residual = False, gnn_type = 'gin'): - ''' - num_layers (int): number of GNN message passing layers - emb_dim (int): node embedding dimensionality - ''' + + def __init__( + self, + num_layers, + emb_dim, + drop_ratio=0.5, + JK="last", + residual=False, + gnn_type="gin", + ): + """ + num_layers (int): number of GNN message passing layers + emb_dim (int): node embedding dimensionality + """ super(GNN_node, self).__init__() self.num_layers = num_layers @@ -97,12 +115,12 @@ def __init__(self, num_layers, emb_dim, drop_ratio = 0.5, JK = "last", residual self.batch_norms = nn.ModuleList() for layer in range(num_layers): - if gnn_type == 'gin': + if gnn_type == "gin": self.convs.append(GINConv(emb_dim)) - elif gnn_type == 'gcn': + elif gnn_type == "gcn": self.convs.append(GCNConv(emb_dim)) else: - ValueError('Undefined GNN type called {}'.format(gnn_type)) + ValueError("Undefined GNN type called {}".format(gnn_type)) self.batch_norms.append(nn.BatchNorm1d(emb_dim)) @@ -115,10 +133,12 @@ def forward(self, g, x, edge_attr): h = self.batch_norms[layer](h) if layer == self.num_layers - 1: - #remove relu for the last layer - h = F.dropout(h, self.drop_ratio, training = self.training) + # remove relu for the last layer + h = F.dropout(h, self.drop_ratio, training=self.training) else: - h = F.dropout(F.relu(h), self.drop_ratio, training = self.training) + h = F.dropout( + F.relu(h), self.drop_ratio, training=self.training + ) if self.residual: h += h_list[layer] @@ -142,11 +162,20 @@ class GNN_node_Virtualnode(nn.Module): Output: node representations """ - def __init__(self, num_layers, emb_dim, drop_ratio = 0.5, JK = "last", residual = False, gnn_type = 'gin'): - ''' - num_layers (int): number of GNN message passing layers - emb_dim (int): node embedding dimensionality - ''' + + def __init__( + self, + num_layers, + emb_dim, + drop_ratio=0.5, + JK="last", + residual=False, + gnn_type="gin", + ): + """ + num_layers (int): number of GNN message passing layers + emb_dim (int): node embedding dimensionality + """ super(GNN_node_Virtualnode, self).__init__() self.num_layers = num_layers @@ -173,31 +202,38 @@ def __init__(self, num_layers, emb_dim, drop_ratio = 0.5, JK = "last", residual self.mlp_virtualnode_list = nn.ModuleList() for layer in range(num_layers): - if gnn_type == 'gin': + if gnn_type == "gin": self.convs.append(GINConv(emb_dim)) - elif gnn_type == 'gcn': + elif gnn_type == "gcn": self.convs.append(GCNConv(emb_dim)) else: - ValueError('Undefined GNN type called {}'.format(gnn_type)) + ValueError("Undefined GNN type called {}".format(gnn_type)) self.batch_norms.append(nn.BatchNorm1d(emb_dim)) for layer in range(num_layers - 1): - self.mlp_virtualnode_list.append(nn.Sequential(nn.Linear(emb_dim, emb_dim), - nn.BatchNorm1d(emb_dim), - nn.ReLU(), - nn.Linear(emb_dim, emb_dim), - nn.BatchNorm1d(emb_dim), - nn.ReLU())) + self.mlp_virtualnode_list.append( + nn.Sequential( + nn.Linear(emb_dim, emb_dim), + nn.BatchNorm1d(emb_dim), + nn.ReLU(), + nn.Linear(emb_dim, emb_dim), + nn.BatchNorm1d(emb_dim), + nn.ReLU(), + ) + ) self.pool = SumPooling() def forward(self, g, x, edge_attr): ### virtual node embeddings for graphs virtualnode_embedding = self.virtualnode_embedding( - torch.zeros(g.batch_size).to(x.dtype).to(x.device)) + torch.zeros(g.batch_size).to(x.dtype).to(x.device) + ) h_list = [self.atom_encoder(x)] - batch_id = dgl.broadcast_nodes(g, torch.arange(g.batch_size).to(x.device)) + batch_id = dgl.broadcast_nodes( + g, torch.arange(g.batch_size).to(x.device) + ) for layer in range(self.num_layers): ### add message from virtual nodes to graph nodes h_list[layer] = h_list[layer] + virtualnode_embedding[batch_id] @@ -206,10 +242,12 @@ def forward(self, g, x, edge_attr): h = self.convs[layer](g, h_list[layer], edge_attr) h = self.batch_norms[layer](h) if layer == self.num_layers - 1: - #remove relu for the last layer - h = F.dropout(h, self.drop_ratio, training = self.training) + # remove relu for the last layer + h = F.dropout(h, self.drop_ratio, training=self.training) else: - h = F.dropout(F.relu(h), self.drop_ratio, training = self.training) + h = F.dropout( + F.relu(h), self.drop_ratio, training=self.training + ) if self.residual: h = h + h_list[layer] @@ -219,17 +257,26 @@ def forward(self, g, x, edge_attr): ### update the virtual nodes if layer < self.num_layers - 1: ### add message from graph nodes to virtual nodes - virtualnode_embedding_temp = self.pool(g, h_list[layer]) + virtualnode_embedding + virtualnode_embedding_temp = ( + self.pool(g, h_list[layer]) + virtualnode_embedding + ) ### transform virtual nodes using MLP virtualnode_embedding_temp = self.mlp_virtualnode_list[layer]( - virtualnode_embedding_temp) + virtualnode_embedding_temp + ) if self.residual: virtualnode_embedding = virtualnode_embedding + F.dropout( - virtualnode_embedding_temp, self.drop_ratio, training = self.training) + virtualnode_embedding_temp, + self.drop_ratio, + training=self.training, + ) else: virtualnode_embedding = F.dropout( - virtualnode_embedding_temp, self.drop_ratio, training = self.training) + virtualnode_embedding_temp, + self.drop_ratio, + training=self.training, + ) ### Different implementations of Jk-concat if self.JK == "last": diff --git a/examples/pytorch/ogb_lsc/PCQM4M/gnn.py b/examples/pytorch/ogb_lsc/PCQM4M/gnn.py index 3adb43eccd1b..52335819d57b 100644 --- a/examples/pytorch/ogb_lsc/PCQM4M/gnn.py +++ b/examples/pytorch/ogb_lsc/PCQM4M/gnn.py @@ -1,19 +1,33 @@ import torch import torch.nn as nn +from conv import GNN_node, GNN_node_Virtualnode -from dgl.nn.pytorch import SumPooling, AvgPooling, MaxPooling, GlobalAttentionPooling, Set2Set +from dgl.nn.pytorch import ( + AvgPooling, + GlobalAttentionPooling, + MaxPooling, + Set2Set, + SumPooling, +) -from conv import GNN_node, GNN_node_Virtualnode class GNN(nn.Module): - - def __init__(self, num_tasks = 1, num_layers = 5, emb_dim = 300, gnn_type = 'gin', - virtual_node = True, residual = False, drop_ratio = 0, JK = "last", - graph_pooling = "sum"): - ''' - num_tasks (int): number of labels to be predicted - virtual_node (bool): whether to add virtual node or not - ''' + def __init__( + self, + num_tasks=1, + num_layers=5, + emb_dim=300, + gnn_type="gin", + virtual_node=True, + residual=False, + drop_ratio=0, + JK="last", + graph_pooling="sum", + ): + """ + num_tasks (int): number of labels to be predicted + virtual_node (bool): whether to add virtual node or not + """ super(GNN, self).__init__() self.num_layers = num_layers @@ -28,14 +42,23 @@ def __init__(self, num_tasks = 1, num_layers = 5, emb_dim = 300, gnn_type = 'gin ### GNN to generate node embeddings if virtual_node: - self.gnn_node = GNN_node_Virtualnode(num_layers, emb_dim, JK = JK, - drop_ratio = drop_ratio, - residual = residual, - gnn_type = gnn_type) + self.gnn_node = GNN_node_Virtualnode( + num_layers, + emb_dim, + JK=JK, + drop_ratio=drop_ratio, + residual=residual, + gnn_type=gnn_type, + ) else: - self.gnn_node = GNN_node(num_layers, emb_dim, JK = JK, drop_ratio = drop_ratio, - residual = residual, gnn_type = gnn_type) - + self.gnn_node = GNN_node( + num_layers, + emb_dim, + JK=JK, + drop_ratio=drop_ratio, + residual=residual, + gnn_type=gnn_type, + ) ### Pooling function to generate whole-graph embeddings if self.graph_pooling == "sum": @@ -46,18 +69,21 @@ def __init__(self, num_tasks = 1, num_layers = 5, emb_dim = 300, gnn_type = 'gin self.pool = MaxPooling elif self.graph_pooling == "attention": self.pool = GlobalAttentionPooling( - gate_nn = nn.Sequential(nn.Linear(emb_dim, 2*emb_dim), - nn.BatchNorm1d(2*emb_dim), - nn.ReLU(), - nn.Linear(2*emb_dim, 1))) + gate_nn=nn.Sequential( + nn.Linear(emb_dim, 2 * emb_dim), + nn.BatchNorm1d(2 * emb_dim), + nn.ReLU(), + nn.Linear(2 * emb_dim, 1), + ) + ) elif self.graph_pooling == "set2set": - self.pool = Set2Set(emb_dim, n_iters = 2, n_layers = 2) + self.pool = Set2Set(emb_dim, n_iters=2, n_layers=2) else: raise ValueError("Invalid graph pooling type.") if graph_pooling == "set2set": - self.graph_pred_linear = nn.Linear(2*self.emb_dim, self.num_tasks) + self.graph_pred_linear = nn.Linear(2 * self.emb_dim, self.num_tasks) else: self.graph_pred_linear = nn.Linear(self.emb_dim, self.num_tasks) diff --git a/examples/pytorch/ogb_lsc/PCQM4M/main.py b/examples/pytorch/ogb_lsc/PCQM4M/main.py index 0fd5338abc8b..118af92a6116 100644 --- a/examples/pytorch/ogb_lsc/PCQM4M/main.py +++ b/examples/pytorch/ogb_lsc/PCQM4M/main.py @@ -1,17 +1,18 @@ import argparse -import dgl -import numpy as np import os import random + +import numpy as np import torch import torch.optim as optim +from gnn import GNN from ogb.lsc import DglPCQM4MDataset, PCQM4MEvaluator +from torch.optim.lr_scheduler import StepLR from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter -from torch.optim.lr_scheduler import StepLR from tqdm import tqdm -from gnn import GNN +import dgl reg_criterion = torch.nn.L1Loss() @@ -30,11 +31,13 @@ def train(model, device, loader, optimizer): for step, (bg, labels) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) - x = bg.ndata.pop('feat') - edge_attr = bg.edata.pop('feat') + x = bg.ndata.pop("feat") + edge_attr = bg.edata.pop("feat") labels = labels.to(device) - pred = model(bg, x, edge_attr).view(-1,) + pred = model(bg, x, edge_attr).view( + -1, + ) optimizer.zero_grad() loss = reg_criterion(pred, labels) loss.backward() @@ -52,12 +55,14 @@ def eval(model, device, loader, evaluator): for step, (bg, labels) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) - x = bg.ndata.pop('feat') - edge_attr = bg.edata.pop('feat') + x = bg.ndata.pop("feat") + edge_attr = bg.edata.pop("feat") labels = labels.to(device) with torch.no_grad(): - pred = model(bg, x, edge_attr).view(-1, ) + pred = model(bg, x, edge_attr).view( + -1, + ) y_true.append(labels.view(pred.shape).detach().cpu()) y_pred.append(pred.detach().cpu()) @@ -76,11 +81,13 @@ def test(model, device, loader): for step, (bg, _) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) - x = bg.ndata.pop('feat') - edge_attr = bg.edata.pop('feat') + x = bg.ndata.pop("feat") + edge_attr = bg.edata.pop("feat") with torch.no_grad(): - pred = model(bg, x, edge_attr).view(-1, ) + pred = model(bg, x, edge_attr).view( + -1, + ) y_pred.append(pred.detach().cpu()) @@ -91,37 +98,88 @@ def test(model, device, loader): def main(): # Training settings - parser = argparse.ArgumentParser(description='GNN baselines on pcqm4m with DGL') - parser.add_argument('--seed', type=int, default=42, - help='random seed to use (default: 42)') - parser.add_argument('--device', type=int, default=0, - help='which gpu to use if any (default: 0)') - parser.add_argument('--gnn', type=str, default='gin-virtual', - help='GNN to use, which can be from ' - '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)') - parser.add_argument('--graph_pooling', type=str, default='sum', - help='graph pooling strategy mean or sum (default: sum)') - parser.add_argument('--drop_ratio', type=float, default=0, - help='dropout ratio (default: 0)') - parser.add_argument('--num_layers', type=int, default=5, - help='number of GNN message passing layers (default: 5)') - parser.add_argument('--emb_dim', type=int, default=600, - help='dimensionality of hidden units in GNNs (default: 600)') - parser.add_argument('--train_subset', action='store_true', - help='use 10% of the training set for training') - parser.add_argument('--batch_size', type=int, default=256, - help='input batch size for training (default: 256)') - parser.add_argument('--epochs', type=int, default=100, - help='number of epochs to train (default: 100)') - parser.add_argument('--num_workers', type=int, default=0, - help='number of workers (default: 0)') - parser.add_argument('--log_dir', type=str, default="", - help='tensorboard log directory. If not specified, ' - 'tensorboard will not be used.') - parser.add_argument('--checkpoint_dir', type=str, default='', - help='directory to save checkpoint') - parser.add_argument('--save_test_dir', type=str, default='', - help='directory to save test submission file') + parser = argparse.ArgumentParser( + description="GNN baselines on pcqm4m with DGL" + ) + parser.add_argument( + "--seed", type=int, default=42, help="random seed to use (default: 42)" + ) + parser.add_argument( + "--device", + type=int, + default=0, + help="which gpu to use if any (default: 0)", + ) + parser.add_argument( + "--gnn", + type=str, + default="gin-virtual", + help="GNN to use, which can be from " + "[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)", + ) + parser.add_argument( + "--graph_pooling", + type=str, + default="sum", + help="graph pooling strategy mean or sum (default: sum)", + ) + parser.add_argument( + "--drop_ratio", type=float, default=0, help="dropout ratio (default: 0)" + ) + parser.add_argument( + "--num_layers", + type=int, + default=5, + help="number of GNN message passing layers (default: 5)", + ) + parser.add_argument( + "--emb_dim", + type=int, + default=600, + help="dimensionality of hidden units in GNNs (default: 600)", + ) + parser.add_argument( + "--train_subset", + action="store_true", + help="use 10% of the training set for training", + ) + parser.add_argument( + "--batch_size", + type=int, + default=256, + help="input batch size for training (default: 256)", + ) + parser.add_argument( + "--epochs", + type=int, + default=100, + help="number of epochs to train (default: 100)", + ) + parser.add_argument( + "--num_workers", + type=int, + default=0, + help="number of workers (default: 0)", + ) + parser.add_argument( + "--log_dir", + type=str, + default="", + help="tensorboard log directory. If not specified, " + "tensorboard will not be used.", + ) + parser.add_argument( + "--checkpoint_dir", + type=str, + default="", + help="directory to save checkpoint", + ) + parser.add_argument( + "--save_test_dir", + type=str, + default="", + help="directory to save test submission file", + ) args = parser.parse_args() print(args) @@ -137,7 +195,7 @@ def main(): device = torch.device("cpu") ### automatic dataloading and splitting - dataset = DglPCQM4MDataset(root='dataset/') + dataset = DglPCQM4MDataset(root="dataset/") # split_idx['train'], split_idx['valid'], split_idx['test'] # separately gives a 1D int64 tensor @@ -148,47 +206,77 @@ def main(): if args.train_subset: subset_ratio = 0.1 - subset_idx = torch.randperm(len(split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] - train_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, - num_workers=args.num_workers, collate_fn=collate_dgl) + subset_idx = torch.randperm(len(split_idx["train"]))[ + : int(subset_ratio * len(split_idx["train"])) + ] + train_loader = DataLoader( + dataset[split_idx["train"][subset_idx]], + batch_size=args.batch_size, + shuffle=True, + num_workers=args.num_workers, + collate_fn=collate_dgl, + ) else: - train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, - num_workers=args.num_workers, collate_fn=collate_dgl) - - valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, - num_workers=args.num_workers, collate_fn=collate_dgl) - - if args.save_test_dir != '': - test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, - num_workers=args.num_workers, collate_fn=collate_dgl) - - if args.checkpoint_dir != '': + train_loader = DataLoader( + dataset[split_idx["train"]], + batch_size=args.batch_size, + shuffle=True, + num_workers=args.num_workers, + collate_fn=collate_dgl, + ) + + valid_loader = DataLoader( + dataset[split_idx["valid"]], + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + collate_fn=collate_dgl, + ) + + if args.save_test_dir != "": + test_loader = DataLoader( + dataset[split_idx["test"]], + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + collate_fn=collate_dgl, + ) + + if args.checkpoint_dir != "": os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { - 'num_layers': args.num_layers, - 'emb_dim': args.emb_dim, - 'drop_ratio': args.drop_ratio, - 'graph_pooling': args.graph_pooling + "num_layers": args.num_layers, + "emb_dim": args.emb_dim, + "drop_ratio": args.drop_ratio, + "graph_pooling": args.graph_pooling, } - if args.gnn == 'gin': - model = GNN(gnn_type='gin', virtual_node=False, **shared_params).to(device) - elif args.gnn == 'gin-virtual': - model = GNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) - elif args.gnn == 'gcn': - model = GNN(gnn_type='gcn', virtual_node=False, **shared_params).to(device) - elif args.gnn == 'gcn-virtual': - model = GNN(gnn_type='gcn', virtual_node=True, **shared_params).to(device) + if args.gnn == "gin": + model = GNN(gnn_type="gin", virtual_node=False, **shared_params).to( + device + ) + elif args.gnn == "gin-virtual": + model = GNN(gnn_type="gin", virtual_node=True, **shared_params).to( + device + ) + elif args.gnn == "gcn": + model = GNN(gnn_type="gcn", virtual_node=False, **shared_params).to( + device + ) + elif args.gnn == "gcn-virtual": + model = GNN(gnn_type="gcn", virtual_node=True, **shared_params).to( + device + ) else: - raise ValueError('Invalid GNN type') + raise ValueError("Invalid GNN type") num_params = sum(p.numel() for p in model.parameters()) - print(f'#Params: {num_params}') + print(f"#Params: {num_params}") optimizer = optim.Adam(model.parameters(), lr=0.001) - if args.log_dir != '': + if args.log_dir != "": writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 @@ -201,40 +289,50 @@ def main(): for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) - print('Training...') + print("Training...") train_mae = train(model, device, train_loader, optimizer) - print('Evaluating...') + print("Evaluating...") valid_mae = eval(model, device, valid_loader, evaluator) - print({'Train': train_mae, 'Validation': valid_mae}) + print({"Train": train_mae, "Validation": valid_mae}) - if args.log_dir != '': - writer.add_scalar('valid/mae', valid_mae, epoch) - writer.add_scalar('train/mae', train_mae, epoch) + if args.log_dir != "": + writer.add_scalar("valid/mae", valid_mae, epoch) + writer.add_scalar("train/mae", train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae - if args.checkpoint_dir != '': - print('Saving checkpoint...') - checkpoint = {'epoch': epoch, 'model_state_dict': model.state_dict(), - 'optimizer_state_dict': optimizer.state_dict(), - 'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, - 'num_params': num_params} - torch.save(checkpoint, os.path.join(args.checkpoint_dir, 'checkpoint.pt')) - - if args.save_test_dir != '': - print('Predicting on test data...') + if args.checkpoint_dir != "": + print("Saving checkpoint...") + checkpoint = { + "epoch": epoch, + "model_state_dict": model.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + "scheduler_state_dict": scheduler.state_dict(), + "best_val_mae": best_valid_mae, + "num_params": num_params, + } + torch.save( + checkpoint, + os.path.join(args.checkpoint_dir, "checkpoint.pt"), + ) + + if args.save_test_dir != "": + print("Predicting on test data...") y_pred = test(model, device, test_loader) - print('Saving test submission file...') - evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir) + print("Saving test submission file...") + evaluator.save_test_submission( + {"y_pred": y_pred}, args.save_test_dir + ) scheduler.step() - print(f'Best validation MAE so far: {best_valid_mae}') + print(f"Best validation MAE so far: {best_valid_mae}") - if args.log_dir != '': + if args.log_dir != "": writer.close() + if __name__ == "__main__": main() diff --git a/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py b/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py index 0144cac84231..12a2c07263d5 100644 --- a/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py +++ b/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py @@ -1,33 +1,37 @@ import argparse -import dgl -import numpy as np import os import random -import torch +import numpy as np +import torch +from gnn import GNN from ogb.lsc import PCQM4MDataset, PCQM4MEvaluator from ogb.utils import smiles2graph from torch.utils.data import DataLoader from tqdm import tqdm -from gnn import GNN +import dgl + def collate_dgl(graphs): batched_graph = dgl.batch(graphs) return batched_graph + def test(model, device, loader): model.eval() y_pred = [] for step, bg in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) - x = bg.ndata.pop('feat') - edge_attr = bg.edata.pop('feat') + x = bg.ndata.pop("feat") + edge_attr = bg.edata.pop("feat") with torch.no_grad(): - pred = model(bg, x, edge_attr).view(-1, ) + pred = model(bg, x, edge_attr).view( + -1, + ) y_pred.append(pred.detach().cpu()) @@ -43,53 +47,99 @@ def __init__(self, smiles_list, smiles2graph=smiles2graph): self.smiles2graph = smiles2graph def __getitem__(self, idx): - '''Get datapoint with index''' + """Get datapoint with index""" smiles, _ = self.smiles_list[idx] graph = self.smiles2graph(smiles) - dgl_graph = dgl.graph((graph['edge_index'][0], graph['edge_index'][1]), - num_nodes=graph['num_nodes']) - dgl_graph.edata['feat'] = torch.from_numpy(graph['edge_feat']).to(torch.int64) - dgl_graph.ndata['feat'] = torch.from_numpy(graph['node_feat']).to(torch.int64) + dgl_graph = dgl.graph( + (graph["edge_index"][0], graph["edge_index"][1]), + num_nodes=graph["num_nodes"], + ) + dgl_graph.edata["feat"] = torch.from_numpy(graph["edge_feat"]).to( + torch.int64 + ) + dgl_graph.ndata["feat"] = torch.from_numpy(graph["node_feat"]).to( + torch.int64 + ) return dgl_graph def __len__(self): - '''Length of the dataset + """Length of the dataset Returns ------- int Length of Dataset - ''' + """ return len(self.smiles_list) def main(): # Training settings - parser = argparse.ArgumentParser(description='GNN baselines on pcqm4m with DGL') - parser.add_argument('--seed', type=int, default=42, - help='random seed to use (default: 42)') - parser.add_argument('--device', type=int, default=0, - help='which gpu to use if any (default: 0)') - parser.add_argument('--gnn', type=str, default='gin-virtual', - help='GNN to use, which can be from ' - '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)') - parser.add_argument('--graph_pooling', type=str, default='sum', - help='graph pooling strategy mean or sum (default: sum)') - parser.add_argument('--drop_ratio', type=float, default=0, - help='dropout ratio (default: 0)') - parser.add_argument('--num_layers', type=int, default=5, - help='number of GNN message passing layers (default: 5)') - parser.add_argument('--emb_dim', type=int, default=600, - help='dimensionality of hidden units in GNNs (default: 600)') - parser.add_argument('--batch_size', type=int, default=256, - help='input batch size for training (default: 256)') - parser.add_argument('--num_workers', type=int, default=0, - help='number of workers (default: 0)') - parser.add_argument('--checkpoint_dir', type=str, default='', - help='directory to save checkpoint') - parser.add_argument('--save_test_dir', type=str, default='', - help='directory to save test submission file') + parser = argparse.ArgumentParser( + description="GNN baselines on pcqm4m with DGL" + ) + parser.add_argument( + "--seed", type=int, default=42, help="random seed to use (default: 42)" + ) + parser.add_argument( + "--device", + type=int, + default=0, + help="which gpu to use if any (default: 0)", + ) + parser.add_argument( + "--gnn", + type=str, + default="gin-virtual", + help="GNN to use, which can be from " + "[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)", + ) + parser.add_argument( + "--graph_pooling", + type=str, + default="sum", + help="graph pooling strategy mean or sum (default: sum)", + ) + parser.add_argument( + "--drop_ratio", type=float, default=0, help="dropout ratio (default: 0)" + ) + parser.add_argument( + "--num_layers", + type=int, + default=5, + help="number of GNN message passing layers (default: 5)", + ) + parser.add_argument( + "--emb_dim", + type=int, + default=600, + help="dimensionality of hidden units in GNNs (default: 600)", + ) + parser.add_argument( + "--batch_size", + type=int, + default=256, + help="input batch size for training (default: 256)", + ) + parser.add_argument( + "--num_workers", + type=int, + default=0, + help="number of workers (default: 0)", + ) + parser.add_argument( + "--checkpoint_dir", + type=str, + default="", + help="directory to save checkpoint", + ) + parser.add_argument( + "--save_test_dir", + type=str, + default="", + help="directory to save test submission file", + ) args = parser.parse_args() print(args) @@ -106,50 +156,63 @@ def main(): ### automatic data loading and splitting ### Read in the raw SMILES strings - smiles_dataset = PCQM4MDataset(root='dataset/', only_smiles=True) + smiles_dataset = PCQM4MDataset(root="dataset/", only_smiles=True) split_idx = smiles_dataset.get_idx_split() - test_smiles_dataset = [smiles_dataset[i] for i in split_idx['test']] + test_smiles_dataset = [smiles_dataset[i] for i in split_idx["test"]] onthefly_dataset = OnTheFlyPCQMDataset(test_smiles_dataset) - test_loader = DataLoader(onthefly_dataset, batch_size=args.batch_size, shuffle=False, - num_workers=args.num_workers, collate_fn=collate_dgl) + test_loader = DataLoader( + onthefly_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + collate_fn=collate_dgl, + ) ### automatic evaluator. evaluator = PCQM4MEvaluator() shared_params = { - 'num_layers': args.num_layers, - 'emb_dim': args.emb_dim, - 'drop_ratio': args.drop_ratio, - 'graph_pooling': args.graph_pooling + "num_layers": args.num_layers, + "emb_dim": args.emb_dim, + "drop_ratio": args.drop_ratio, + "graph_pooling": args.graph_pooling, } - if args.gnn == 'gin': - model = GNN(gnn_type='gin', virtual_node=False, **shared_params).to(device) - elif args.gnn == 'gin-virtual': - model = GNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) - elif args.gnn == 'gcn': - model = GNN(gnn_type='gcn', virtual_node=False, **shared_params).to(device) - elif args.gnn == 'gcn-virtual': - model = GNN(gnn_type='gcn', virtual_node=True, **shared_params).to(device) + if args.gnn == "gin": + model = GNN(gnn_type="gin", virtual_node=False, **shared_params).to( + device + ) + elif args.gnn == "gin-virtual": + model = GNN(gnn_type="gin", virtual_node=True, **shared_params).to( + device + ) + elif args.gnn == "gcn": + model = GNN(gnn_type="gcn", virtual_node=False, **shared_params).to( + device + ) + elif args.gnn == "gcn-virtual": + model = GNN(gnn_type="gcn", virtual_node=True, **shared_params).to( + device + ) else: - raise ValueError('Invalid GNN type') + raise ValueError("Invalid GNN type") num_params = sum(p.numel() for p in model.parameters()) - print(f'#Params: {num_params}') + print(f"#Params: {num_params}") - checkpoint_path = os.path.join(args.checkpoint_dir, 'checkpoint.pt') + checkpoint_path = os.path.join(args.checkpoint_dir, "checkpoint.pt") if not os.path.exists(checkpoint_path): - raise RuntimeError(f'Checkpoint file not found at {checkpoint_path}') + raise RuntimeError(f"Checkpoint file not found at {checkpoint_path}") ## reading in checkpoint checkpoint = torch.load(checkpoint_path) - model.load_state_dict(checkpoint['model_state_dict']) + model.load_state_dict(checkpoint["model_state_dict"]) - print('Predicting on test data...') + print("Predicting on test data...") y_pred = test(model, device, test_loader) - print('Saving test submission file...') - evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir) + print("Saving test submission file...") + evaluator.save_test_submission({"y_pred": y_pred}, args.save_test_dir) if __name__ == "__main__": diff --git a/examples/pytorch/pinsage/builder.py b/examples/pytorch/pinsage/builder.py index 813d48ee32f6..860c3dcf7065 100644 --- a/examples/pytorch/pinsage/builder.py +++ b/examples/pytorch/pinsage/builder.py @@ -1,16 +1,24 @@ """Graph builder from pandas dataframes""" from collections import namedtuple -from pandas.api.types import is_numeric_dtype, is_categorical_dtype, is_categorical + +from pandas.api.types import ( + is_categorical, + is_categorical_dtype, + is_numeric_dtype, +) + import dgl -__all__ = ['PandasGraphBuilder'] +__all__ = ["PandasGraphBuilder"] + def _series_to_tensor(series): if is_categorical(series): - return torch.LongTensor(series.cat.codes.values.astype('int64')) - else: # numeric + return torch.LongTensor(series.cat.codes.values.astype("int64")) + else: # numeric return torch.FloatTensor(series.values) + class PandasGraphBuilder(object): """Creates a heterogeneous graph from multiple pandas dataframes. @@ -60,25 +68,36 @@ class PandasGraphBuilder(object): >>> g.num_edges('plays') 4 """ + def __init__(self): self.entity_tables = {} self.relation_tables = {} - self.entity_pk_to_name = {} # mapping from primary key name to entity name - self.entity_pk = {} # mapping from entity name to primary key - self.entity_key_map = {} # mapping from entity names to primary key values + self.entity_pk_to_name = ( + {} + ) # mapping from primary key name to entity name + self.entity_pk = {} # mapping from entity name to primary key + self.entity_key_map = ( + {} + ) # mapping from entity names to primary key values self.num_nodes_per_type = {} self.edges_per_relation = {} self.relation_name_to_etype = {} - self.relation_src_key = {} # mapping from relation name to source key - self.relation_dst_key = {} # mapping from relation name to destination key + self.relation_src_key = {} # mapping from relation name to source key + self.relation_dst_key = ( + {} + ) # mapping from relation name to destination key def add_entities(self, entity_table, primary_key, name): - entities = entity_table[primary_key].astype('category') + entities = entity_table[primary_key].astype("category") if not (entities.value_counts() == 1).all(): - raise ValueError('Different entity with the same primary key detected.') + raise ValueError( + "Different entity with the same primary key detected." + ) # preserve the category order in the original entity table - entities = entities.cat.reorder_categories(entity_table[primary_key].values) + entities = entities.cat.reorder_categories( + entity_table[primary_key].values + ) self.entity_pk_to_name[primary_key] = name self.entity_pk[name] = primary_key @@ -86,33 +105,47 @@ def add_entities(self, entity_table, primary_key, name): self.entity_key_map[name] = entities self.entity_tables[name] = entity_table - def add_binary_relations(self, relation_table, source_key, destination_key, name): - src = relation_table[source_key].astype('category') + def add_binary_relations( + self, relation_table, source_key, destination_key, name + ): + src = relation_table[source_key].astype("category") src = src.cat.set_categories( - self.entity_key_map[self.entity_pk_to_name[source_key]].cat.categories) - dst = relation_table[destination_key].astype('category') + self.entity_key_map[ + self.entity_pk_to_name[source_key] + ].cat.categories + ) + dst = relation_table[destination_key].astype("category") dst = dst.cat.set_categories( - self.entity_key_map[self.entity_pk_to_name[destination_key]].cat.categories) + self.entity_key_map[ + self.entity_pk_to_name[destination_key] + ].cat.categories + ) if src.isnull().any(): raise ValueError( - 'Some source entities in relation %s do not exist in entity %s.' % - (name, source_key)) + "Some source entities in relation %s do not exist in entity %s." + % (name, source_key) + ) if dst.isnull().any(): raise ValueError( - 'Some destination entities in relation %s do not exist in entity %s.' % - (name, destination_key)) + "Some destination entities in relation %s do not exist in entity %s." + % (name, destination_key) + ) srctype = self.entity_pk_to_name[source_key] dsttype = self.entity_pk_to_name[destination_key] etype = (srctype, name, dsttype) self.relation_name_to_etype[name] = etype - self.edges_per_relation[etype] = (src.cat.codes.values.astype('int64'), dst.cat.codes.values.astype('int64')) + self.edges_per_relation[etype] = ( + src.cat.codes.values.astype("int64"), + dst.cat.codes.values.astype("int64"), + ) self.relation_tables[name] = relation_table self.relation_src_key[name] = source_key self.relation_dst_key[name] = destination_key def build(self): # Create heterograph - graph = dgl.heterograph(self.edges_per_relation, self.num_nodes_per_type) + graph = dgl.heterograph( + self.edges_per_relation, self.num_nodes_per_type + ) return graph - diff --git a/examples/pytorch/pinsage/data_utils.py b/examples/pytorch/pinsage/data_utils.py index b3393968d861..cbb06f8b3157 100644 --- a/examples/pytorch/pinsage/data_utils.py +++ b/examples/pytorch/pinsage/data_utils.py @@ -1,18 +1,21 @@ -import torch -import dgl +import dask.dataframe as dd import numpy as np import scipy.sparse as ssp +import torch import tqdm -import dask.dataframe as dd + +import dgl + # This is the train-test split method most of the recommender system papers running on MovieLens # takes. It essentially follows the intuition of "training on the past and predict the future". # One can also change the threshold to make validation and test set take larger proportions. def train_test_split_by_time(df, timestamp, user): - df['train_mask'] = np.ones((len(df),), dtype=np.bool) - df['val_mask'] = np.zeros((len(df),), dtype=np.bool) - df['test_mask'] = np.zeros((len(df),), dtype=np.bool) + df["train_mask"] = np.ones((len(df),), dtype=np.bool) + df["val_mask"] = np.zeros((len(df),), dtype=np.bool) + df["test_mask"] = np.zeros((len(df),), dtype=np.bool) df = dd.from_pandas(df, npartitions=10) + def train_test_split(df): df = df.sort_values([timestamp]) if df.shape[0] > 1: @@ -22,16 +25,25 @@ def train_test_split(df): df.iloc[-2, -3] = False df.iloc[-2, -2] = True return df - df = df.groupby(user, group_keys=False).apply(train_test_split).compute(scheduler='processes').sort_index() + + df = ( + df.groupby(user, group_keys=False) + .apply(train_test_split) + .compute(scheduler="processes") + .sort_index() + ) print(df[df[user] == df[user].unique()[0]].sort_values(timestamp)) - return df['train_mask'].to_numpy().nonzero()[0], \ - df['val_mask'].to_numpy().nonzero()[0], \ - df['test_mask'].to_numpy().nonzero()[0] + return ( + df["train_mask"].to_numpy().nonzero()[0], + df["val_mask"].to_numpy().nonzero()[0], + df["test_mask"].to_numpy().nonzero()[0], + ) + def build_train_graph(g, train_indices, utype, itype, etype, etype_rev): train_g = g.edge_subgraph( - {etype: train_indices, etype_rev: train_indices}, - relabel_nodes=False) + {etype: train_indices, etype_rev: train_indices}, relabel_nodes=False + ) # copy features for ntype in g.ntypes: @@ -39,10 +51,13 @@ def build_train_graph(g, train_indices, utype, itype, etype, etype_rev): train_g.nodes[ntype].data[col] = data for etype in g.etypes: for col, data in g.edges[etype].data.items(): - train_g.edges[etype].data[col] = data[train_g.edges[etype].data[dgl.EID]] + train_g.edges[etype].data[col] = data[ + train_g.edges[etype].data[dgl.EID] + ] return train_g + def build_val_test_matrix(g, val_indices, test_indices, utype, itype, etype): n_users = g.num_nodes(utype) n_items = g.num_nodes(itype) @@ -52,11 +67,17 @@ def build_val_test_matrix(g, val_indices, test_indices, utype, itype, etype): val_dst = val_dst.numpy() test_src = test_src.numpy() test_dst = test_dst.numpy() - val_matrix = ssp.coo_matrix((np.ones_like(val_src), (val_src, val_dst)), (n_users, n_items)) - test_matrix = ssp.coo_matrix((np.ones_like(test_src), (test_src, test_dst)), (n_users, n_items)) + val_matrix = ssp.coo_matrix( + (np.ones_like(val_src), (val_src, val_dst)), (n_users, n_items) + ) + test_matrix = ssp.coo_matrix( + (np.ones_like(test_src), (test_src, test_dst)), (n_users, n_items) + ) return val_matrix, test_matrix + def linear_normalize(values): - return (values - values.min(0, keepdims=True)) / \ - (values.max(0, keepdims=True) - values.min(0, keepdims=True)) + return (values - values.min(0, keepdims=True)) / ( + values.max(0, keepdims=True) - values.min(0, keepdims=True) + ) diff --git a/examples/pytorch/pinsage/evaluation.py b/examples/pytorch/pinsage/evaluation.py index 2b6468a6145f..70d67a2c7e2b 100644 --- a/examples/pytorch/pinsage/evaluation.py +++ b/examples/pytorch/pinsage/evaluation.py @@ -1,8 +1,11 @@ +import argparse +import pickle + import numpy as np import torch -import pickle + import dgl -import argparse + def prec(recommendations, ground_truth): n_users, n_items = ground_truth.shape @@ -13,8 +16,11 @@ def prec(recommendations, ground_truth): hit = relevance.any(axis=1).mean() return hit + class LatestNNRecommender(object): - def __init__(self, user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size): + def __init__( + self, user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size + ): self.user_ntype = user_ntype self.item_ntype = item_ntype self.user_to_item_etype = user_to_item_etype @@ -27,19 +33,27 @@ def recommend(self, full_graph, K, h_user, h_item): """ graph_slice = full_graph.edge_type_subgraph([self.user_to_item_etype]) n_users = full_graph.num_nodes(self.user_ntype) - latest_interactions = dgl.sampling.select_topk(graph_slice, 1, self.timestamp, edge_dir='out') - user, latest_items = latest_interactions.all_edges(form='uv', order='srcdst') + latest_interactions = dgl.sampling.select_topk( + graph_slice, 1, self.timestamp, edge_dir="out" + ) + user, latest_items = latest_interactions.all_edges( + form="uv", order="srcdst" + ) # each user should have at least one "latest" interaction assert torch.equal(user, torch.arange(n_users)) recommended_batches = [] user_batches = torch.arange(n_users).split(self.batch_size) for user_batch in user_batches: - latest_item_batch = latest_items[user_batch].to(device=h_item.device) + latest_item_batch = latest_items[user_batch].to( + device=h_item.device + ) dist = h_item[latest_item_batch] @ h_item.t() # exclude items that are already interacted for i, u in enumerate(user_batch.tolist()): - interacted_items = full_graph.successors(u, etype=self.user_to_item_etype) + interacted_items = full_graph.successors( + u, etype=self.user_to_item_etype + ) dist[i, interacted_items] = -np.inf recommended_batches.append(dist.topk(K, 1)[1]) @@ -48,31 +62,33 @@ def recommend(self, full_graph, K, h_user, h_item): def evaluate_nn(dataset, h_item, k, batch_size): - g = dataset['train-graph'] - val_matrix = dataset['val-matrix'].tocsr() - test_matrix = dataset['test-matrix'].tocsr() - item_texts = dataset['item-texts'] - user_ntype = dataset['user-type'] - item_ntype = dataset['item-type'] - user_to_item_etype = dataset['user-to-item-type'] - timestamp = dataset['timestamp-edge-column'] + g = dataset["train-graph"] + val_matrix = dataset["val-matrix"].tocsr() + test_matrix = dataset["test-matrix"].tocsr() + item_texts = dataset["item-texts"] + user_ntype = dataset["user-type"] + item_ntype = dataset["item-type"] + user_to_item_etype = dataset["user-to-item-type"] + timestamp = dataset["timestamp-edge-column"] rec_engine = LatestNNRecommender( - user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size) + user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size + ) recommendations = rec_engine.recommend(g, k, None, h_item).cpu().numpy() return prec(recommendations, val_matrix) -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('dataset_path', type=str) - parser.add_argument('item_embedding_path', type=str) - parser.add_argument('-k', type=int, default=10) - parser.add_argument('--batch-size', type=int, default=32) + parser.add_argument("dataset_path", type=str) + parser.add_argument("item_embedding_path", type=str) + parser.add_argument("-k", type=int, default=10) + parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() - with open(args.dataset_path, 'rb') as f: + with open(args.dataset_path, "rb") as f: dataset = pickle.load(f) - with open(args.item_embedding_path, 'rb') as f: + with open(args.item_embedding_path, "rb") as f: emb = torch.FloatTensor(pickle.load(f)) print(evaluate_nn(dataset, emb, args.k, args.batch_size)) diff --git a/examples/pytorch/pinsage/layers.py b/examples/pytorch/pinsage/layers.py index 76a6fde2206c..48038d1cc820 100644 --- a/examples/pytorch/pinsage/layers.py +++ b/examples/pytorch/pinsage/layers.py @@ -1,14 +1,17 @@ import torch import torch.nn as nn import torch.nn.functional as F + import dgl -import dgl.nn.pytorch as dglnn import dgl.function as fn +import dgl.nn.pytorch as dglnn + def disable_grad(module): for param in module.parameters(): param.requires_grad = False + def _init_input_modules(g, ntype, textset, hidden_dims): # We initialize the linear projections of each input feature ``x`` as # follows: @@ -30,44 +33,50 @@ def _init_input_modules(g, ntype, textset, hidden_dims): module_dict[column] = m elif data.dtype == torch.int64: assert data.ndim == 1 - m = nn.Embedding( - data.max() + 2, hidden_dims, padding_idx=-1) + m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1) nn.init.xavier_uniform_(m.weight) module_dict[column] = m if textset is not None: for column, field in textset.items(): - textlist, vocab, pad_var, batch_first = field + textlist, vocab, pad_var, batch_first = field module_dict[column] = BagOfWords(vocab, hidden_dims) return module_dict + class BagOfWords(nn.Module): def __init__(self, vocab, hidden_dims): super().__init__() self.emb = nn.Embedding( - len(vocab.get_itos()), hidden_dims, - padding_idx=vocab.get_stoi()['']) + len(vocab.get_itos()), + hidden_dims, + padding_idx=vocab.get_stoi()[""], + ) nn.init.xavier_uniform_(self.emb.weight) def forward(self, x, length): return self.emb(x).sum(1) / length.unsqueeze(1).float() + class LinearProjector(nn.Module): """ Projects each input feature of the graph linearly and sums them up """ + def __init__(self, full_graph, ntype, textset, hidden_dims): super().__init__() self.ntype = ntype - self.inputs = _init_input_modules(full_graph, ntype, textset, hidden_dims) + self.inputs = _init_input_modules( + full_graph, ntype, textset, hidden_dims + ) def forward(self, ndata): projections = [] for feature, data in ndata.items(): - if feature == dgl.NID or feature.endswith('__len'): + if feature == dgl.NID or feature.endswith("__len"): # This is an additional feature indicating the length of the ``feature`` # column; we shouldn't process this. continue @@ -75,7 +84,7 @@ def forward(self, ndata): module = self.inputs[feature] if isinstance(module, BagOfWords): # Textual feature; find the length and pass it to the textual module. - length = ndata[feature + '__len'] + length = ndata[feature + "__len"] result = module(data, length) else: result = module(data) @@ -83,6 +92,7 @@ def forward(self, ndata): return torch.stack(projections, 1).sum(1) + class WeightedSAGEConv(nn.Module): def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu): super().__init__() @@ -94,7 +104,7 @@ def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu): self.dropout = nn.Dropout(0.5) def reset_parameters(self): - gain = nn.init.calculate_gain('relu') + gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.Q.weight, gain=gain) nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.constant_(self.Q.bias, 0) @@ -108,18 +118,21 @@ def forward(self, g, h, weights): """ h_src, h_dst = h with g.local_scope(): - g.srcdata['n'] = self.act(self.Q(self.dropout(h_src))) - g.edata['w'] = weights.float() - g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n')) - g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws')) - n = g.dstdata['n'] - ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1) + g.srcdata["n"] = self.act(self.Q(self.dropout(h_src))) + g.edata["w"] = weights.float() + g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n")) + g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws")) + n = g.dstdata["n"] + ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1) z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z_norm = z.norm(2, 1, keepdim=True) - z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm) + z_norm = torch.where( + z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm + ) z = z / z_norm return z + class SAGENet(nn.Module): def __init__(self, hidden_dims, n_layers): """ @@ -133,14 +146,17 @@ def __init__(self, hidden_dims, n_layers): self.convs = nn.ModuleList() for _ in range(n_layers): - self.convs.append(WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims)) + self.convs.append( + WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims) + ) def forward(self, blocks, h): for layer, block in zip(self.convs, blocks): - h_dst = h[:block.num_nodes('DST/' + block.ntypes[0])] - h = layer(block, (h, h_dst), block.edata['weights']) + h_dst = h[: block.num_nodes("DST/" + block.ntypes[0])] + h = layer(block, (h, h_dst), block.edata["weights"]) return h + class ItemToItemScorer(nn.Module): def __init__(self, full_graph, ntype): super().__init__() @@ -151,7 +167,7 @@ def __init__(self, full_graph, ntype): def _add_bias(self, edges): bias_src = self.bias[edges.src[dgl.NID]] bias_dst = self.bias[edges.dst[dgl.NID]] - return {'s': edges.data['s'] + bias_src + bias_dst} + return {"s": edges.data["s"] + bias_src + bias_dst} def forward(self, item_item_graph, h): """ @@ -159,8 +175,8 @@ def forward(self, item_item_graph, h): h : hidden state of every node """ with item_item_graph.local_scope(): - item_item_graph.ndata['h'] = h - item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's')) + item_item_graph.ndata["h"] = h + item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s")) item_item_graph.apply_edges(self._add_bias) - pair_score = item_item_graph.edata['s'] + pair_score = item_item_graph.edata["s"] return pair_score diff --git a/examples/pytorch/pinsage/model_sparse.py b/examples/pytorch/pinsage/model_sparse.py index 4928e9c24753..77a91a3ec36d 100644 --- a/examples/pytorch/pinsage/model_sparse.py +++ b/examples/pytorch/pinsage/model_sparse.py @@ -1,25 +1,29 @@ -import pickle import argparse +import os +import pickle + +import evaluation +import layers import numpy as np +import sampler as sampler_module import torch import torch.nn as nn -from torch.utils.data import DataLoader import torchtext -import dgl -import os import tqdm - -import layers -import sampler as sampler_module -import evaluation +from torch.utils.data import DataLoader from torchtext.data.utils import get_tokenizer from torchtext.vocab import build_vocab_from_iterator +import dgl + + class PinSAGEModel(nn.Module): def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers): super().__init__() - self.proj = layers.LinearProjector(full_graph, ntype, textsets, hidden_dims) + self.proj = layers.LinearProjector( + full_graph, ntype, textsets, hidden_dims + ) self.sage = layers.SAGENet(hidden_dims, n_layers) self.scorer = layers.ItemToItemScorer(full_graph, ntype) @@ -36,19 +40,22 @@ def get_repr(self, blocks, item_emb): # add to the item embedding itself h_item = h_item + item_emb(blocks[0].srcdata[dgl.NID].cpu()).to(h_item) - h_item_dst = h_item_dst + item_emb(blocks[-1].dstdata[dgl.NID].cpu()).to(h_item_dst) + h_item_dst = h_item_dst + item_emb( + blocks[-1].dstdata[dgl.NID].cpu() + ).to(h_item_dst) return h_item_dst + self.sage(blocks, h_item) + def train(dataset, args): - g = dataset['train-graph'] - val_matrix = dataset['val-matrix'].tocsr() - test_matrix = dataset['test-matrix'].tocsr() - item_texts = dataset['item-texts'] - user_ntype = dataset['user-type'] - item_ntype = dataset['item-type'] - user_to_item_etype = dataset['user-to-item-type'] - timestamp = dataset['timestamp-edge-column'] + g = dataset["train-graph"] + val_matrix = dataset["val-matrix"].tocsr() + test_matrix = dataset["test-matrix"].tocsr() + item_texts = dataset["item-texts"] + user_ntype = dataset["user-type"] + item_ntype = dataset["item-type"] + user_to_item_etype = dataset["user-to-item-type"] + timestamp = dataset["timestamp-edge-column"] device = torch.device(args.device) @@ -64,31 +71,53 @@ def train(dataset, args): l = tokenizer(item_texts[key][i].lower()) textlist.append(l) for key, field in item_texts.items(): - vocab2 = build_vocab_from_iterator(textlist, specials=["",""]) - textset[key] = (textlist, vocab2, vocab2.get_stoi()[''], batch_first) + vocab2 = build_vocab_from_iterator( + textlist, specials=["", ""] + ) + textset[key] = ( + textlist, + vocab2, + vocab2.get_stoi()[""], + batch_first, + ) # Sampler batch_sampler = sampler_module.ItemToItemBatchSampler( - g, user_ntype, item_ntype, args.batch_size) + g, user_ntype, item_ntype, args.batch_size + ) neighbor_sampler = sampler_module.NeighborSampler( - g, user_ntype, item_ntype, args.random_walk_length, - args.random_walk_restart_prob, args.num_random_walks, args.num_neighbors, - args.num_layers) - collator = sampler_module.PinSAGECollator(neighbor_sampler, g, item_ntype, textset) + g, + user_ntype, + item_ntype, + args.random_walk_length, + args.random_walk_restart_prob, + args.num_random_walks, + args.num_neighbors, + args.num_layers, + ) + collator = sampler_module.PinSAGECollator( + neighbor_sampler, g, item_ntype, textset + ) dataloader = DataLoader( batch_sampler, collate_fn=collator.collate_train, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) dataloader_test = DataLoader( torch.arange(g.num_nodes(item_ntype)), batch_size=args.batch_size, collate_fn=collator.collate_test, - num_workers=args.num_workers) + num_workers=args.num_workers, + ) dataloader_it = iter(dataloader) # Model - model = PinSAGEModel(g, item_ntype, textset, args.hidden_dims, args.num_layers).to(device) - item_emb = nn.Embedding(g.num_nodes(item_ntype), args.hidden_dims, sparse=True) + model = PinSAGEModel( + g, item_ntype, textset, args.hidden_dims, args.num_layers + ).to(device) + item_emb = nn.Embedding( + g.num_nodes(item_ntype), args.hidden_dims, sparse=True + ) # Optimizer opt = torch.optim.Adam(model.parameters(), lr=args.lr) opt_emb = torch.optim.SparseAdam(item_emb.parameters(), lr=args.lr) @@ -114,7 +143,9 @@ def train(dataset, args): # Evaluate model.eval() with torch.no_grad(): - item_batches = torch.arange(g.num_nodes(item_ntype)).split(args.batch_size) + item_batches = torch.arange(g.num_nodes(item_ntype)).split( + args.batch_size + ) h_item_batches = [] for blocks in tqdm.tqdm(dataloader_test): for i in range(len(blocks)): @@ -123,32 +154,37 @@ def train(dataset, args): h_item_batches.append(model.get_repr(blocks, item_emb)) h_item = torch.cat(h_item_batches, 0) - print(evaluation.evaluate_nn(dataset, h_item, args.k, args.batch_size)) + print( + evaluation.evaluate_nn(dataset, h_item, args.k, args.batch_size) + ) + -if __name__ == '__main__': +if __name__ == "__main__": # Arguments parser = argparse.ArgumentParser() - parser.add_argument('dataset_path', type=str) - parser.add_argument('--random-walk-length', type=int, default=2) - parser.add_argument('--random-walk-restart-prob', type=float, default=0.5) - parser.add_argument('--num-random-walks', type=int, default=10) - parser.add_argument('--num-neighbors', type=int, default=3) - parser.add_argument('--num-layers', type=int, default=2) - parser.add_argument('--hidden-dims', type=int, default=16) - parser.add_argument('--batch-size', type=int, default=32) - parser.add_argument('--device', type=str, default='cpu') # can also be "cuda:0" - parser.add_argument('--num-epochs', type=int, default=1) - parser.add_argument('--batches-per-epoch', type=int, default=20000) - parser.add_argument('--num-workers', type=int, default=0) - parser.add_argument('--lr', type=float, default=3e-5) - parser.add_argument('-k', type=int, default=10) + parser.add_argument("dataset_path", type=str) + parser.add_argument("--random-walk-length", type=int, default=2) + parser.add_argument("--random-walk-restart-prob", type=float, default=0.5) + parser.add_argument("--num-random-walks", type=int, default=10) + parser.add_argument("--num-neighbors", type=int, default=3) + parser.add_argument("--num-layers", type=int, default=2) + parser.add_argument("--hidden-dims", type=int, default=16) + parser.add_argument("--batch-size", type=int, default=32) + parser.add_argument( + "--device", type=str, default="cpu" + ) # can also be "cuda:0" + parser.add_argument("--num-epochs", type=int, default=1) + parser.add_argument("--batches-per-epoch", type=int, default=20000) + parser.add_argument("--num-workers", type=int, default=0) + parser.add_argument("--lr", type=float, default=3e-5) + parser.add_argument("-k", type=int, default=10) args = parser.parse_args() # Load dataset - data_info_path = os.path.join(args.dataset_path, 'data.pkl') - with open(data_info_path, 'rb') as f: + data_info_path = os.path.join(args.dataset_path, "data.pkl") + with open(data_info_path, "rb") as f: dataset = pickle.load(f) - train_g_path = os.path.join(args.dataset_path, 'train_g.bin') + train_g_path = os.path.join(args.dataset_path, "train_g.bin") g_list, _ = dgl.load_graphs(train_g_path) - dataset['train-graph'] = g_list[0] + dataset["train-graph"] = g_list[0] train(dataset, args)