forked from PreferredAI/cornac
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add TriRank model * Add max_iter in online training
- Loading branch information
Showing
7 changed files
with
395 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .recom_trirank import TriRank |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,332 @@ | ||
# Copyright 2018 The Cornac Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================ | ||
|
||
import numpy as np | ||
from scipy.sparse import csr_matrix | ||
from tqdm.auto import tqdm | ||
|
||
from ..recommender import Recommender | ||
from ...utils import get_rng | ||
from ...utils.init_utils import uniform | ||
from ...exception import ScoreException | ||
|
||
|
||
EPS = 1e-10 | ||
|
||
|
||
class TriRank(Recommender): | ||
"""TriRank: Review-aware Explainable Recommendation by Modeling Aspects. | ||
Parameters | ||
---------- | ||
name: string, optional, default: 'TriRank' | ||
The name of the recommender model. | ||
alpha: float, optional, default: 1 | ||
The weight of smoothness on user-item relation | ||
beta: float, optional, default: 1 | ||
The weight of smoothness on item-aspect relation | ||
gamma: float, optional, default: 1 | ||
The weight of smoothness on user-aspect relation | ||
eta_U: float, optional, default: 1 | ||
The weight of fitting constraint on users | ||
eta_P: float, optional, default: 1 | ||
The weight of fitting constraint on items | ||
eta_A: float, optional, default: 1 | ||
The weight of fitting constraint on aspects | ||
max_iter: int, optional, default: 100 | ||
Maximum number of iterations to stop online training. If set to `max_iter=-1`, \ | ||
the online training will stop when model parameters are converged. | ||
trainable: boolean, optional, default: True | ||
When False, the model is not trained and Cornac assumes that the model already \ | ||
pre-trained (R, X, Y, p, a, u are not None). | ||
verbose: boolean, optional, default: False | ||
When True, running logs are displayed. | ||
init_params: dictionary, optional, default: None | ||
List of initial parameters, e.g., init_params = {'R':R, 'X':X, 'Y':Y, 'p':p, 'a':a, 'u':u} | ||
R: csr_matrix, shape (n_users, n_items) | ||
The symmetric normalized of edge weight matrix of user-item relation, optional initialization via init_params | ||
X: csr_matrix, shape (n_users, n_aspects) | ||
The symmetric normalized of edge weight matrix of user-aspect relation, optional initialization via init_params | ||
Y: csr_matrix, shape (n_items, n_aspects) | ||
The symmetric normalized of edge weight matrix of item-aspect relation, optional initialization via init_params | ||
p: ndarray, shape (n_items,) | ||
Initialized item weights, optional initialization via init_params | ||
a: ndarray, shape (n_aspects,) | ||
Initialized aspect weights, optional initialization via init_params | ||
u: ndarray, shape (n_aspects,) | ||
Initialized user weights, optional initialization via init_params | ||
seed: int, optional, default: None | ||
Random seed for parameters initialization. | ||
References | ||
---------- | ||
He, Xiangnan, Tao Chen, Min-Yen Kan, and Xiao Chen. 2014. \ | ||
TriRank: Review-aware Explainable Recommendation by Modeling Aspects. \ | ||
In the 24th ACM international on conference on information and knowledge management (CIKM'15). \ | ||
ACM, New York, NY, USA, 1661-1670. DOI: https://doi.org/10.1145/2806416.2806504 | ||
""" | ||
|
||
def __init__( | ||
self, | ||
name="TriRank", | ||
alpha=1, | ||
beta=1, | ||
gamma=1, | ||
eta_U=1, | ||
eta_P=1, | ||
eta_A=1, | ||
max_iter=100, | ||
verbose=True, | ||
init_params=None, | ||
seed=None, | ||
): | ||
super().__init__(name) | ||
self.alpha = alpha | ||
self.beta = beta | ||
self.gamma = gamma | ||
self.eta_U = eta_U | ||
self.eta_P = eta_P | ||
self.eta_A = eta_A | ||
self.max_iter = max_iter | ||
self.verbose = verbose | ||
self.seed = seed | ||
self.rng = get_rng(seed) | ||
|
||
# Init params if provided | ||
self.init_params = {} if init_params is None else init_params | ||
self.R = self.init_params.get("R", None) | ||
self.X = self.init_params.get("X", None) | ||
self.Y = self.init_params.get("Y", None) | ||
self.p = self.init_params.get("p", None) | ||
self.a = self.init_params.get("a", None) | ||
self.u = self.init_params.get("u", None) | ||
|
||
def _init(self): | ||
# Initialize user, item and aspect rank. | ||
if self.p is None: | ||
self.p = uniform(self.train_set.num_items, random_state=self.rng) | ||
if self.a is None: | ||
self.a = uniform( | ||
self.train_set.sentiment.num_aspects, random_state=self.rng | ||
) | ||
if self.u is None: | ||
self.u = uniform(self.train_set.num_users, random_state=self.rng) | ||
|
||
def _symmetrical_normalization(self, matrix: csr_matrix): | ||
row = [] | ||
col = [] | ||
data = [] | ||
row_norm = np.sqrt(matrix.sum(axis=1).A1) | ||
col_norm = np.sqrt(matrix.sum(axis=0).A1) | ||
for i, j in zip(*matrix.nonzero()): | ||
row.append(i) | ||
col.append(j) | ||
data.append(matrix[i, j] / (row_norm[i] * col_norm[j])) | ||
|
||
return csr_matrix((data, (row, col)), shape=matrix.shape) | ||
|
||
def _create_matrices(self, train_set): | ||
from time import time | ||
|
||
start_time = time() | ||
if self.verbose: | ||
print("Building matrices started!") | ||
sentiment_modality = train_set.sentiment | ||
n_users = train_set.num_users | ||
n_items = train_set.num_items | ||
n_aspects = sentiment_modality.num_aspects | ||
|
||
X_row = [] | ||
X_col = [] | ||
X_data = [] | ||
Y_row = [] | ||
Y_col = [] | ||
Y_data = [] | ||
for uid, isid in tqdm( | ||
sentiment_modality.user_sentiment.items(), | ||
disable=not self.verbose, | ||
desc="Building matrices", | ||
): | ||
for iid, sid in isid.items(): | ||
aos = sentiment_modality.sentiment[sid] | ||
aids = set(aid for aid, _, _ in aos) # Only one per review/sid | ||
for aid in aids: | ||
X_row.append(iid) | ||
X_col.append(aid) | ||
X_data.append(1) | ||
Y_row.append(uid) | ||
Y_col.append(aid) | ||
Y_data.append(1) | ||
|
||
# Algorithm 1: Offline training line 2 | ||
X = csr_matrix((X_data, (X_row, X_col)), shape=(n_items, n_aspects)) | ||
Y = csr_matrix((Y_data, (Y_row, Y_col)), shape=(n_users, n_aspects)) | ||
|
||
# Algorithm 1: Offline training line 3 | ||
X.data = np.log2(X.data) + 1 | ||
Y.data = np.log2(Y.data) + 1 | ||
|
||
# Algorithm 1: Offline training line 4 | ||
if self.verbose: | ||
print("Building symmetric normalized matrices R, X, Y") | ||
self.R = self._symmetrical_normalization(train_set.csr_matrix) | ||
self.X = self._symmetrical_normalization(X) | ||
self.Y = self._symmetrical_normalization(Y) | ||
|
||
if self.verbose: | ||
total_time = time() - start_time | ||
print("Building matrices completed in %d s" % total_time) | ||
|
||
def fit(self, train_set, val_set=None): | ||
"""Fit the model to observations. | ||
Parameters | ||
---------- | ||
train_set: :obj:`cornac.data.Dataset`, required | ||
User-Item preference data as well as additional modalities. | ||
val_set: :obj:`cornac.data.Dataset`, optional, default: None | ||
User-Item preference data for model selection purposes (e.g., early stopping). | ||
Returns | ||
------- | ||
self : object | ||
""" | ||
Recommender.fit(self, train_set, val_set) | ||
self._init() | ||
|
||
if not self.trainable: | ||
return self | ||
|
||
# Offline training: Build item-aspect matrix X and user-aspect matrix Y | ||
self._create_matrices(train_set) | ||
return self | ||
|
||
def _online_recommendation(self, user): | ||
# Algorithm 1: Online recommendation line 5 | ||
p_0 = self.train_set.csr_matrix[[user]] | ||
p_0.data.fill(1) | ||
p_0 = p_0.toarray().squeeze() | ||
a_0 = self.Y[user].toarray().squeeze() | ||
u_0 = np.zeros(self.train_set.csr_matrix.shape[0]) | ||
u_0[user] = 1 | ||
|
||
# Algorithm 1: Online training line 6 | ||
if p_0.any(): | ||
p_0 /= np.linalg.norm(p_0, 1) | ||
if a_0.any(): | ||
a_0 /= np.linalg.norm(a_0, 1) | ||
if u_0.any(): | ||
u_0 /= np.linalg.norm(u_0, 1) | ||
|
||
# Algorithm 1: Online recommendation line 7 | ||
p = self.p.copy() | ||
a = self.a.copy() | ||
u = self.u.copy() | ||
|
||
# Algorithm 1: Online recommendation line 8 | ||
prev_p = p | ||
prev_a = a | ||
prev_u = u | ||
inc = 1 | ||
while True: | ||
# eq. 4 | ||
u_denominator = self.alpha + self.gamma + self.eta_U + EPS | ||
u = ( | ||
self.alpha / u_denominator * self.R * p | ||
+ self.gamma / u_denominator * self.Y * a | ||
+ self.eta_U / u_denominator * u_0 | ||
).squeeze() | ||
p_denominator = self.alpha + self.beta + self.eta_P + EPS | ||
p = ( | ||
self.alpha / p_denominator * self.R.T * u | ||
+ self.beta / p_denominator * self.X * a | ||
+ self.eta_P / p_denominator * p_0 | ||
).squeeze() | ||
a_denominator = self.gamma + self.beta + self.eta_A + EPS | ||
a = ( | ||
self.gamma / a_denominator * self.Y.T * u | ||
+ self.beta / a_denominator * self.X.T * p | ||
+ self.eta_P / a_denominator * a_0 | ||
).squeeze() | ||
|
||
if (self.max_iter > 0 and inc > self.max_iter) or ( | ||
np.all(np.isclose(u, prev_u)) | ||
and np.all(np.isclose(p, prev_p)) | ||
and np.all(np.isclose(a, prev_a)) | ||
): # stop when converged | ||
break | ||
prev_p, prev_a, prev_u = p, a, u | ||
inc += 1 | ||
|
||
# Algorithm 1: Online recommendation line 9 | ||
return p, a, u | ||
|
||
def score(self, u_idx, i_idx=None): | ||
"""Predict the scores/ratings of a user for an item. | ||
Parameters | ||
---------- | ||
u_idx: int, required | ||
The index of the user for whom to perform score prediction. | ||
i_idx: int, optional, default: None | ||
The index of the item for which to perform score prediction. | ||
If None, scores for all known items will be returned. | ||
Returns | ||
------- | ||
res : A scalar or a Numpy array | ||
Relative scores that the user gives to the item or to all known items | ||
""" | ||
if self.train_set.is_unk_user(u_idx): | ||
raise ScoreException("Can't make score prediction for (user_id=%d" & u_idx) | ||
if i_idx is not None and self.train_set.is_unk_item(i_idx): | ||
raise ScoreException("Can't make score prediction for (item_id=%d" & i_idx) | ||
|
||
item_scores, *_ = self._online_recommendation(u_idx) | ||
# Set already rated items to zero. | ||
item_scores[self.train_set.csr_matrix[u_idx].indices] = 0 | ||
|
||
# Scale to match rating scale. | ||
item_scores = ( | ||
item_scores | ||
* (self.train_set.max_rating - self.train_set.min_rating) | ||
/ max(item_scores) | ||
+ self.train_set.min_rating | ||
) | ||
|
||
if i_idx is None: | ||
return item_scores | ||
else: | ||
return item_scores[i_idx] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.