forked from hyllll/TRD
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
59 changed files
with
85,561 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
from tqdm import tqdm | ||
import scipy.sparse as sp | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
import torch.backends.cudnn as cudnn | ||
|
||
class AE(nn.Module): | ||
def __init__(self, user_num, item_num, hidden_neuron, epochs, lr, lamda, gpuid='0'): | ||
super(AE, self).__init__() | ||
|
||
self.user_num = user_num | ||
self.item_num = item_num | ||
|
||
self.lr = lr | ||
self.lamda = lamda | ||
self.epochs = epochs | ||
|
||
os.environ['CUDA_VISIBLE_DEVICES'] = gpuid | ||
cudnn.benchmark = True | ||
|
||
self.Encoder = nn.Linear(item_num, hidden_neuron) | ||
self.Decoder = nn.Linear(hidden_neuron, item_num) | ||
self.activation = nn.Sigmoid() | ||
|
||
nn.init.normal_(self.Encoder.weight, std=0.03) | ||
nn.init.normal_(self.Decoder.weight, std=0.03) | ||
|
||
def forward(self, x): | ||
x = self.activation(self.Encoder(x)) | ||
x = self.Decoder(x) | ||
|
||
return x | ||
|
||
def fit(self, train_loader, r_all): | ||
if torch.cuda.is_available(): | ||
self.cuda() | ||
else: | ||
self.cpu() | ||
|
||
criterion = nn.MSELoss(reduction='sum') | ||
optimizer = optim.Adam(self.parameters(), lr=self.lr) | ||
|
||
for epoch in range(1, self.epochs + 1): | ||
self.train() | ||
|
||
# set process bar display | ||
pbar = tqdm(train_loader) | ||
pbar.set_description(f'[Epoch {epoch:03d}]') | ||
for ser, mask_ser in pbar: | ||
if torch.cuda.is_available(): | ||
ser = ser.cuda() | ||
mask_ser = mask_ser.cuda() | ||
else: | ||
ser = ser.cpu() | ||
mask_ser = mask_ser.cpu() | ||
|
||
self.zero_grad() | ||
|
||
ser = ser.float() | ||
mask_ser = mask_ser.float() | ||
|
||
pred = self.forward(ser) | ||
|
||
loss = criterion(pred * mask_ser, ser * mask_ser) | ||
loss += self.lamda * (self.Encoder.weight.norm() + self.Decoder.weight.norm()) | ||
loss.backward() | ||
optimizer.step() | ||
|
||
pbar.set_postfix(loss=loss.item()) | ||
self.eval() | ||
|
||
self.prediction = self.forward(torch.tensor(r_all).float()) | ||
self.prediction.clamp_(min=0, max=5) | ||
self.prediction = self.prediction.detach().numpy() | ||
|
||
|
||
def predict(self, u, i): | ||
return self.prediction[u, i] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
import numpy as np | ||
from tqdm import tqdm | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
|
||
import collections | ||
|
||
|
||
class Bundler(nn.Module): | ||
def forward(self, data): | ||
raise NotImplementedError | ||
|
||
def forward_i(self, data): | ||
raise NotImplementedError | ||
|
||
def forward_o(self, data): | ||
raise NotImplementedError | ||
|
||
|
||
class ItemEmb(Bundler): | ||
def __init__(self, item_num=20000, factors=100, padding_idx=0): | ||
super(ItemEmb, self).__init__() | ||
self.item_num = item_num # item_num | ||
self.factors = factors | ||
self.ivectors = nn.Embedding(self.item_num , self.factors, padding_idx=padding_idx) | ||
self.ovectors = nn.Embedding(self.item_num , self.factors, padding_idx=padding_idx) | ||
self.ivectors.weight = nn.Parameter(torch.cat([torch.zeros(1, self.factors), | ||
torch.FloatTensor( | ||
self.item_num - 1, self.factors).uniform_( | ||
-0.5 / self.factors, 0.5 / self.factors)])) | ||
self.ovectors.weight = nn.Parameter(torch.cat([torch.zeros(1, self.factors), | ||
torch.FloatTensor(self.item_num - 1, | ||
self.factors).uniform_(-0.5 / self.factors, | ||
0.5 / self.factors)])) | ||
self.ivectors.weight.requires_grad = True | ||
self.ovectors.weight.requires_grad = True | ||
|
||
def forward(self, data): | ||
return self.forward_i(data) | ||
|
||
def forward_i(self, data): | ||
v = torch.LongTensor(data) | ||
v = v.cuda() if self.ivectors.weight.is_cuda else v | ||
return self.ivectors(v) | ||
|
||
def forward_o(self, data): | ||
v = torch.LongTensor(data) | ||
v = v.cuda() if self.ovectors.weight.is_cuda else v | ||
return self.ovectors(v) | ||
|
||
|
||
class Item2Vec(nn.Module): | ||
def __init__(self, | ||
item2idx, | ||
dataset = 'ml-100k', | ||
item_num=20000, | ||
factors=100, | ||
epochs=20, | ||
n_negs=20, | ||
padding_idx=0, | ||
weights=None, | ||
use_cuda=False, | ||
early_stop=True): | ||
""" | ||
Item2Vec Recommender Class with SGNS | ||
Parameters | ||
---------- | ||
item2idx : Dict, {item : index code} relation mapping | ||
item_num : int, total item number | ||
factors : int, latent factor number | ||
epochs : int, training epoch number | ||
n_negs : int, number of negative samples | ||
padding_idx : int, pads the output with the embedding vector at padding_idx (initialized to zeros) | ||
weights : weight value to use | ||
use_cuda : bool, whether to use CUDA enviroment | ||
early_stop : bool, whether to activate early stop mechanism | ||
""" | ||
super(Item2Vec, self).__init__() | ||
self.dataset = dataset | ||
self.item2idx = item2idx | ||
self.embedding = ItemEmb(item_num, factors, padding_idx) | ||
self.item_num = item_num | ||
self.factors = factors | ||
self.epochs = epochs | ||
self.n_negs = n_negs | ||
self.weights = None | ||
if weights is not None: | ||
wf = np.power(weights, 0.75) | ||
wf = wf / wf.sum() | ||
self.weights = torch.FloatTensor(wf) | ||
|
||
self.use_cuda = use_cuda | ||
self.early_stop = early_stop | ||
|
||
self.user_vec_dict = collections.OrderedDict() | ||
self.item2vec = collections.OrderedDict() | ||
|
||
def forward(self, iitem, oitems): | ||
batch_size = iitem.size()[0] | ||
context_size = oitems.size()[1] | ||
if self.weights is not None: | ||
nitems = torch.multinomial(self.weights, batch_size * context_size * self.n_negs, | ||
replacement=True).view(batch_size, -1) | ||
else: | ||
nitems = torch.FloatTensor(batch_size, | ||
context_size * self.n_negs).uniform_(0, self.item_num - 1).long() | ||
ivectors = self.embedding.forward_i(iitem).unsqueeze(2) | ||
ovectors = self.embedding.forward_o(oitems) | ||
nvectors = self.embedding.forward_o(nitems).neg() | ||
oloss = torch.bmm(ovectors, ivectors).squeeze().sigmoid().log().mean(1) | ||
nloss = torch.bmm(nvectors, ivectors).squeeze().sigmoid().log().view(-1, context_size, | ||
self.n_negs).sum(2).mean(1) | ||
return -(oloss + nloss).mean() | ||
|
||
def fit(self, train_loader): | ||
item2idx = self.item2idx | ||
if self.use_cuda and torch.cuda.is_available(): | ||
self.cuda() | ||
else: | ||
self.cpu() | ||
|
||
optimizer = optim.Adam(self.parameters()) | ||
last_loss = 0. | ||
for epoch in range(1, self.epochs + 1): | ||
current_loss = 0. | ||
# set process bar display | ||
pbar = tqdm(train_loader) | ||
pbar.set_description(f'[Epoch {epoch}]') | ||
for iitem, oitems in pbar: | ||
loss = self.forward(iitem, oitems) | ||
|
||
if torch.isnan(loss): | ||
raise ValueError(f'Loss=Nan or Infinity: current settings does not fit the recommender') | ||
|
||
optimizer.zero_grad() | ||
loss.backward() | ||
optimizer.step() | ||
|
||
pbar.set_postfix(loss=loss.item()) | ||
current_loss += loss.item() | ||
|
||
delta_loss = float(current_loss - last_loss) | ||
if (abs(delta_loss) < 1e-5) and self.early_stop: | ||
print('Satisfy early stop mechanism') | ||
break | ||
else: | ||
last_loss = current_loss | ||
|
||
idx2vec = self.embedding.ivectors.weight.data.cpu().numpy() | ||
|
||
self.item2vec = {k: idx2vec[v, :] for k, v in item2idx.items()} | ||
|
||
def build_user_vec(self, ur): | ||
for u in ur.keys(): | ||
self.user_vec_dict[u] = np.array([self.item2vec[i] for i in ur[u]]).mean(axis=0) | ||
|
||
def predict(self, u, i): | ||
if u in self.user_vec_dict.keys(): | ||
user_vec = self.user_vec_dict[u] | ||
else: | ||
return 0. | ||
item_vec = self.item2vec[i] | ||
|
||
return self._cos_sim(user_vec, item_vec) | ||
|
||
def _cos_sim(self, a, b): | ||
numerator = np.multiply(a, b).sum() | ||
denominator = np.linalg.norm(a) * np.linalg.norm(b) | ||
if denominator == 0: | ||
return 0 | ||
else: | ||
return numerator / denominator |
Oops, something went wrong.