Skip to content

Commit

Permalink
[Misc] Black auto fix. (dmlc#4652)
Browse files Browse the repository at this point in the history
Co-authored-by: Steve <[email protected]>
  • Loading branch information
frozenbugs and Steve authored Sep 28, 2022
1 parent f19f05c commit 0b9df9d
Show file tree
Hide file tree
Showing 99 changed files with 8,585 additions and 4,228 deletions.
115 changes: 76 additions & 39 deletions examples/pytorch/NGCF/NGCF/main.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
import os
from time import time

import torch
import torch.optim as optim
from model import NGCF
from utility.batch_test import *
from utility.helper import early_stopping
from time import time
import os


def main(args):
# Step 1: Prepare graph data and device ================================================================= #
if args.gpu >= 0 and torch.cuda.is_available():
device = 'cuda:{}'.format(args.gpu)
device = "cuda:{}".format(args.gpu)
else:
device = 'cpu'
device = "cpu"

g=data_generator.g
g=g.to(device)
g = data_generator.g
g = g.to(device)

# Step 2: Create model and training components=========================================================== #
model = NGCF(g, args.embed_size, args.layer_size, args.mess_dropout, args.regs[0]).to(device)
model = NGCF(
g, args.embed_size, args.layer_size, args.mess_dropout, args.regs[0]
).to(device)
optimizer = optim.Adam(model.parameters(), lr=args.lr)

# Step 3: training epoches ============================================================================== #
Expand All @@ -27,62 +31,89 @@ def main(args):
loss_loger, pre_loger, rec_loger, ndcg_loger, hit_loger = [], [], [], [], []
for epoch in range(args.epoch):
t1 = time()
loss, mf_loss, emb_loss = 0., 0., 0.
loss, mf_loss, emb_loss = 0.0, 0.0, 0.0
for idx in range(n_batch):
users, pos_items, neg_items = data_generator.sample()
u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(g, 'user', 'item', users,
pos_items,
neg_items)
u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(
g, "user", "item", users, pos_items, neg_items
)

batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss(u_g_embeddings,
pos_i_g_embeddings,
neg_i_g_embeddings)
batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss(
u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings
)
optimizer.zero_grad()
batch_loss.backward()
optimizer.step()

loss += batch_loss
mf_loss += batch_mf_loss
emb_loss += batch_emb_loss


if (epoch + 1) % 10 != 0:
if args.verbose > 0 and epoch % args.verbose == 0:
perf_str = 'Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]' % (
epoch, time() - t1, loss, mf_loss, emb_loss)
perf_str = "Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]" % (
epoch,
time() - t1,
loss,
mf_loss,
emb_loss,
)
print(perf_str)
continue #end the current epoch and move to the next epoch, let the following evaluation run every 10 epoches
continue # end the current epoch and move to the next epoch, let the following evaluation run every 10 epoches

#evaluate the model every 10 epoches
# evaluate the model every 10 epoches
t2 = time()
users_to_test = list(data_generator.test_set.keys())
ret = test(model, g, users_to_test)
t3 = time()

loss_loger.append(loss)
rec_loger.append(ret['recall'])
pre_loger.append(ret['precision'])
ndcg_loger.append(ret['ndcg'])
hit_loger.append(ret['hit_ratio'])
rec_loger.append(ret["recall"])
pre_loger.append(ret["precision"])
ndcg_loger.append(ret["ndcg"])
hit_loger.append(ret["hit_ratio"])

if args.verbose > 0:
perf_str = 'Epoch %d [%.1fs + %.1fs]: train==[%.5f=%.5f + %.5f], recall=[%.5f, %.5f], ' \
'precision=[%.5f, %.5f], hit=[%.5f, %.5f], ndcg=[%.5f, %.5f]' % \
(epoch, t2 - t1, t3 - t2, loss, mf_loss, emb_loss, ret['recall'][0], ret['recall'][-1],
ret['precision'][0], ret['precision'][-1], ret['hit_ratio'][0], ret['hit_ratio'][-1],
ret['ndcg'][0], ret['ndcg'][-1])
perf_str = (
"Epoch %d [%.1fs + %.1fs]: train==[%.5f=%.5f + %.5f], recall=[%.5f, %.5f], "
"precision=[%.5f, %.5f], hit=[%.5f, %.5f], ndcg=[%.5f, %.5f]"
% (
epoch,
t2 - t1,
t3 - t2,
loss,
mf_loss,
emb_loss,
ret["recall"][0],
ret["recall"][-1],
ret["precision"][0],
ret["precision"][-1],
ret["hit_ratio"][0],
ret["hit_ratio"][-1],
ret["ndcg"][0],
ret["ndcg"][-1],
)
)
print(perf_str)

cur_best_pre_0, stopping_step, should_stop = early_stopping(ret['recall'][0], cur_best_pre_0,
stopping_step, expected_order='acc', flag_step=5)
cur_best_pre_0, stopping_step, should_stop = early_stopping(
ret["recall"][0],
cur_best_pre_0,
stopping_step,
expected_order="acc",
flag_step=5,
)

# early stop
if should_stop == True:
break

if ret['recall'][0] == cur_best_pre_0 and args.save_flag == 1:
if ret["recall"][0] == cur_best_pre_0 and args.save_flag == 1:
torch.save(model.state_dict(), args.weights_path + args.model_name)
print('save the weights in path: ', args.weights_path + args.model_name)
print(
"save the weights in path: ",
args.weights_path + args.model_name,
)

recs = np.array(rec_loger)
pres = np.array(pre_loger)
Expand All @@ -92,19 +123,25 @@ def main(args):
best_rec_0 = max(recs[:, 0])
idx = list(recs[:, 0]).index(best_rec_0)

final_perf = "Best Iter=[%d]@[%.1f]\trecall=[%s], precision=[%s], hit=[%s], ndcg=[%s]" % \
(idx, time() - t0, '\t'.join(['%.5f' % r for r in recs[idx]]),
'\t'.join(['%.5f' % r for r in pres[idx]]),
'\t'.join(['%.5f' % r for r in hit[idx]]),
'\t'.join(['%.5f' % r for r in ndcgs[idx]]))
final_perf = (
"Best Iter=[%d]@[%.1f]\trecall=[%s], precision=[%s], hit=[%s], ndcg=[%s]"
% (
idx,
time() - t0,
"\t".join(["%.5f" % r for r in recs[idx]]),
"\t".join(["%.5f" % r for r in pres[idx]]),
"\t".join(["%.5f" % r for r in hit[idx]]),
"\t".join(["%.5f" % r for r in ndcgs[idx]]),
)
)
print(final_perf)

if __name__ == '__main__':

if __name__ == "__main__":
if not os.path.exists(args.weights_path):
os.mkdir(args.weights_path)
args.mess_dropout = eval(args.mess_dropout)
args.layer_size = eval(args.layer_size)
args.regs = eval(args.regs)
print(args)
main(args)

104 changes: 70 additions & 34 deletions examples/pytorch/NGCF/NGCF/model.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,117 @@
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl.function as fn


class NGCFLayer(nn.Module):
def __init__(self, in_size, out_size, norm_dict, dropout):
super(NGCFLayer, self).__init__()
self.in_size = in_size
self.out_size = out_size

#weights for different types of messages
self.W1 = nn.Linear(in_size, out_size, bias = True)
self.W2 = nn.Linear(in_size, out_size, bias = True)
# weights for different types of messages
self.W1 = nn.Linear(in_size, out_size, bias=True)
self.W2 = nn.Linear(in_size, out_size, bias=True)

#leaky relu
# leaky relu
self.leaky_relu = nn.LeakyReLU(0.2)

#dropout layer
# dropout layer
self.dropout = nn.Dropout(dropout)

#initialization
# initialization
torch.nn.init.xavier_uniform_(self.W1.weight)
torch.nn.init.constant_(self.W1.bias, 0)
torch.nn.init.xavier_uniform_(self.W2.weight)
torch.nn.init.constant_(self.W2.bias, 0)

#norm
# norm
self.norm_dict = norm_dict

def forward(self, g, feat_dict):

funcs = {} #message and reduce functions dict
#for each type of edges, compute messages and reduce them all
funcs = {} # message and reduce functions dict
# for each type of edges, compute messages and reduce them all
for srctype, etype, dsttype in g.canonical_etypes:
if srctype == dsttype: #for self loops
if srctype == dsttype: # for self loops
messages = self.W1(feat_dict[srctype])
g.nodes[srctype].data[etype] = messages #store in ndata
funcs[(srctype, etype, dsttype)] = (fn.copy_u(etype, 'm'), fn.sum('m', 'h')) #define message and reduce functions
g.nodes[srctype].data[etype] = messages # store in ndata
funcs[(srctype, etype, dsttype)] = (
fn.copy_u(etype, "m"),
fn.sum("m", "h"),
) # define message and reduce functions
else:
src, dst = g.edges(etype=(srctype, etype, dsttype))
norm = self.norm_dict[(srctype, etype, dsttype)]
messages = norm * (self.W1(feat_dict[srctype][src]) + self.W2(feat_dict[srctype][src]*feat_dict[dsttype][dst])) #compute messages
g.edges[(srctype, etype, dsttype)].data[etype] = messages #store in edata
funcs[(srctype, etype, dsttype)] = (fn.copy_e(etype, 'm'), fn.sum('m', 'h')) #define message and reduce functions

g.multi_update_all(funcs, 'sum') #update all, reduce by first type-wisely then across different types
feature_dict={}
messages = norm * (
self.W1(feat_dict[srctype][src])
+ self.W2(feat_dict[srctype][src] * feat_dict[dsttype][dst])
) # compute messages
g.edges[(srctype, etype, dsttype)].data[
etype
] = messages # store in edata
funcs[(srctype, etype, dsttype)] = (
fn.copy_e(etype, "m"),
fn.sum("m", "h"),
) # define message and reduce functions

g.multi_update_all(
funcs, "sum"
) # update all, reduce by first type-wisely then across different types
feature_dict = {}
for ntype in g.ntypes:
h = self.leaky_relu(g.nodes[ntype].data['h']) #leaky relu
h = self.dropout(h) #dropout
h = F.normalize(h,dim=1,p=2) #l2 normalize
h = self.leaky_relu(g.nodes[ntype].data["h"]) # leaky relu
h = self.dropout(h) # dropout
h = F.normalize(h, dim=1, p=2) # l2 normalize
feature_dict[ntype] = h
return feature_dict


class NGCF(nn.Module):
def __init__(self, g, in_size, layer_size, dropout, lmbd=1e-5):
super(NGCF, self).__init__()
self.lmbd = lmbd
self.norm_dict = dict()
for srctype, etype, dsttype in g.canonical_etypes:
src, dst = g.edges(etype=(srctype, etype, dsttype))
dst_degree = g.in_degrees(dst, etype=(srctype, etype, dsttype)).float() #obtain degrees
src_degree = g.out_degrees(src, etype=(srctype, etype, dsttype)).float()
norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze(1) #compute norm
dst_degree = g.in_degrees(
dst, etype=(srctype, etype, dsttype)
).float() # obtain degrees
src_degree = g.out_degrees(
src, etype=(srctype, etype, dsttype)
).float()
norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze(
1
) # compute norm
self.norm_dict[(srctype, etype, dsttype)] = norm

self.layers = nn.ModuleList()
self.layers.append(
NGCFLayer(in_size, layer_size[0], self.norm_dict, dropout[0])
)
self.num_layers = len(layer_size)
for i in range(self.num_layers-1):
for i in range(self.num_layers - 1):
self.layers.append(
NGCFLayer(layer_size[i], layer_size[i+1], self.norm_dict, dropout[i+1])
NGCFLayer(
layer_size[i],
layer_size[i + 1],
self.norm_dict,
dropout[i + 1],
)
)
self.initializer = nn.init.xavier_uniform_

#embeddings for different types of nodes
self.feature_dict = nn.ParameterDict({
ntype: nn.Parameter(self.initializer(torch.empty(g.num_nodes(ntype), in_size))) for ntype in g.ntypes
})
# embeddings for different types of nodes
self.feature_dict = nn.ParameterDict(
{
ntype: nn.Parameter(
self.initializer(torch.empty(g.num_nodes(ntype), in_size))
)
for ntype in g.ntypes
}
)

def create_bpr_loss(self, users, pos_items, neg_items):
pos_scores = (users * pos_items).sum(1)
Expand All @@ -88,17 +120,21 @@ def create_bpr_loss(self, users, pos_items, neg_items):
mf_loss = nn.LogSigmoid()(pos_scores - neg_scores).mean()
mf_loss = -1 * mf_loss

regularizer = (torch.norm(users) ** 2 + torch.norm(pos_items) ** 2 + torch.norm(neg_items) ** 2) / 2
regularizer = (
torch.norm(users) ** 2
+ torch.norm(pos_items) ** 2
+ torch.norm(neg_items) ** 2
) / 2
emb_loss = self.lmbd * regularizer / users.shape[0]

return mf_loss + emb_loss, mf_loss, emb_loss

def rating(self, u_g_embeddings, pos_i_g_embeddings):
return torch.matmul(u_g_embeddings, pos_i_g_embeddings.t())

def forward(self, g,user_key, item_key, users, pos_items, neg_items):
h_dict = {ntype : self.feature_dict[ntype] for ntype in g.ntypes}
#obtain features of each layer and concatenate them all
def forward(self, g, user_key, item_key, users, pos_items, neg_items):
h_dict = {ntype: self.feature_dict[ntype] for ntype in g.ntypes}
# obtain features of each layer and concatenate them all
user_embeds = []
item_embeds = []
user_embeds.append(h_dict[user_key])
Expand Down
Loading

0 comments on commit 0b9df9d

Please sign in to comment.