Skip to content

Commit

Permalink
Refactor code in examples (PreferredAI#317)
Browse files Browse the repository at this point in the history
  • Loading branch information
tqtg authored Feb 7, 2020
1 parent 205292e commit b5bd8e3
Show file tree
Hide file tree
Showing 28 changed files with 529 additions and 260 deletions.
32 changes: 20 additions & 12 deletions examples/biased_mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,35 @@
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit


# Load MovieLens 1M ratings
ml_1m = movielens.load_feedback(variant='1M')
ml_1m = movielens.load_feedback(variant="1M")

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=ml_1m,
test_size=0.2,
exclude_unknowns=False,
verbose=True)
ratio_split = RatioSplit(
data=ml_1m, test_size=0.2, exclude_unknowns=False, verbose=True
)

# Instantiate the global average baseline and MF model
global_avg = cornac.models.GlobalAvg()
mf = cornac.models.MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02,
use_bias=True, early_stop=True, verbose=True)
mf = cornac.models.MF(
k=10,
max_iter=25,
learning_rate=0.01,
lambda_reg=0.02,
use_bias=True,
early_stop=True,
verbose=True,
)

# Instantiate MAE and RMSE for evaluation
mae = cornac.metrics.MAE()
rmse = cornac.metrics.RMSE()

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[global_avg, mf],
metrics=[mae, rmse],
user_based=True)
exp.run()
cornac.Experiment(
eval_method=ratio_split,
models=[global_avg, mf],
metrics=[mae, rmse],
user_based=True,
).run()
2 changes: 1 addition & 1 deletion examples/bpr_netflix.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
verbose=True,
)

# Instantiate the most popular baseline and BPR model
# Instantiate the most popular baseline, BPR, and WBPR models
most_pop = cornac.models.MostPop()
bpr = cornac.models.BPR(
k=50, max_iter=200, learning_rate=0.001, lambda_reg=0.001, verbose=True
Expand Down
24 changes: 13 additions & 11 deletions examples/c2pf_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,32 +24,34 @@
from cornac.models import C2PF
from cornac.datasets import amazon_office as office


# In addition to user-item feedback, C2PF integrates item-to-item contextual relationships
# The necessary data can be loaded as follows
ratings = office.load_feedback()
contexts = office.load_graph()

# Instantiate a GraphModality, it make it convenient to work with graph (network) auxiliary information
# Instantiate a GraphModality, it makes it convenient to work with graph (network) auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_graph_modality = GraphModality(data=contexts)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=ratings,
test_size=0.2, rating_threshold=3.5,
exclude_unknowns=True, verbose=True,
item_graph=item_graph_modality)
ratio_split = RatioSplit(
data=ratings,
test_size=0.2,
rating_threshold=3.5,
exclude_unknowns=True,
verbose=True,
item_graph=item_graph_modality,
)

# Instantiate C2PF
c2pf = C2PF(k=100, max_iter=80, variant='c2pf')
c2pf = C2PF(k=100, max_iter=80, variant="c2pf")

# Evaluation metrics
nDgc = metrics.NDCG(k=-1)
ndcg = metrics.NDCG(k=-1)
mrr = metrics.MRR()
rec = metrics.Recall(k=20)
pre = metrics.Precision(k=20)

# Put everything together into an experiment and run it
exp = Experiment(eval_method=ratio_split,
models=[c2pf],
metrics=[nDgc, mrr, rec, pre])
exp.run()
Experiment(eval_method=ratio_split, models=[c2pf], metrics=[ndcg, mrr, rec, pre]).run()
42 changes: 29 additions & 13 deletions examples/cdl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,46 @@
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer


# CDL composes an autoencoder with matrix factorization to model item (article) texts and user-item preferences
# The necessary data can be loaded as follows
docs, item_ids = citeulike.load_text()
feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))

# Instantiate a TextModality, it make it convenient to work with text auxiliary information
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
item_text_modality = TextModality(
corpus=docs,
ids=item_ids,
tokenizer=BaseTokenizer(stop_words="english"),
max_vocab=8000,
max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
ratio_split = RatioSplit(
data=feedback,
test_size=0.2,
exclude_unknowns=True,
item_text=item_text_modality,
verbose=True,
seed=123,
rating_threshold=0.5,
)

# Instantiate CDL
cdl = cornac.models.CDL(k=50, autoencoder_structure=[200], max_iter=30,
lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000)
# Instantiate CDL model
cdl = cornac.models.CDL(
k=50,
autoencoder_structure=[200],
max_iter=30,
lambda_u=0.1,
lambda_v=1,
lambda_w=0.1,
lambda_n=1000,
)

# Use Recall@300 for evaluation
rec_300 = cornac.metrics.Recall(k=300)

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[cdl],
metrics=[rec_300])
exp.run()
cornac.Experiment(eval_method=ratio_split, models=[cdl], metrics=[rec_300]).run()
46 changes: 32 additions & 14 deletions examples/cdr_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,49 @@
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer


# CDR composes an autoencoder with a ranking collaborative model to represent item texts and user-item interactions
# The necessary data can be loaded as follows
docs, item_ids = citeulike.load_text()
feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))

# Instantiate a TextModality, it make it convenient to work with text auxiliary information
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
item_text_modality = TextModality(
corpus=docs,
ids=item_ids,
tokenizer=BaseTokenizer(stop_words="english"),
max_vocab=8000,
max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
ratio_split = RatioSplit(
data=feedback,
test_size=0.2,
exclude_unknowns=True,
item_text=item_text_modality,
verbose=True,
seed=123,
rating_threshold=0.5,
)

# Instantiate CDR
cdr = cornac.models.CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128,
lambda_u=0.01, lambda_v=0.1, lambda_w=0.0001, lambda_n=5,
learning_rate=0.001, vocab_size=8000)
# Instantiate CDR model
cdr = cornac.models.CDR(
k=50,
autoencoder_structure=[200],
max_iter=100,
batch_size=128,
lambda_u=0.01,
lambda_v=0.1,
lambda_w=0.0001,
lambda_n=5,
learning_rate=0.001,
vocab_size=8000,
)

# Use Recall@300 for evaluation
rec_300 = cornac.metrics.Recall(k=300)

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[cdr],
metrics=[rec_300])
exp.run()
cornac.Experiment(eval_method=ratio_split, models=[cdr], metrics=[rec_300]).run()
35 changes: 22 additions & 13 deletions examples/conv_mf_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,39 @@
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer


# ConvMF extends matrix factorization to leverage item textual information
# The necessary data can be loaded as follows
plots, movie_ids = movielens.load_plot()
ml_1m = movielens.load_feedback(variant='1M', reader=Reader(item_set=movie_ids))
ml_1m = movielens.load_feedback(variant="1M", reader=Reader(item_set=movie_ids))

# Instantiate a TextModality, it make it convenient to work with text auxiliary information
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=plots, ids=movie_ids,
tokenizer=BaseTokenizer(sep='\t', stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
item_text_modality = TextModality(
corpus=plots,
ids=movie_ids,
tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
max_vocab=8000,
max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=ml_1m, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123)
ratio_split = RatioSplit(
data=ml_1m,
test_size=0.2,
exclude_unknowns=True,
item_text=item_text_modality,
verbose=True,
seed=123,
)

# Instantiate ConvMF
# Instantiate ConvMF model
convmf = cornac.models.ConvMF(n_epochs=5, verbose=True, seed=123)

# Instantiate RMSE for evaluation
rmse = cornac.metrics.RMSE()

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[convmf],
metrics=[rmse],
user_based=True)
exp.run()
cornac.Experiment(
eval_method=ratio_split, models=[convmf], metrics=[rmse], user_based=True
).run()
31 changes: 20 additions & 11 deletions examples/ctr_example_citeulike.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,38 @@
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer


# CTR composes the LDA topic model with matrix factorization to model item (article) texts and user-item preferences
# The necessary data can be loaded as follows
docs, item_ids = citeulike.load_text()
feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))

# Instantiate a TextModality, it make it convenient to work with text auxiliary information
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(sep=' ', stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
item_text_modality = TextModality(
corpus=docs,
ids=item_ids,
tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
max_vocab=8000,
max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
ratio_split = RatioSplit(
data=feedback,
test_size=0.2,
exclude_unknowns=True,
item_text=item_text_modality,
verbose=True,
seed=123,
rating_threshold=0.5,
)

# Instantiate CTR
# Instantiate CTR model
ctr = cornac.models.CTR(k=50, max_iter=50, lambda_v=1)

# Use Recall@300 for evaluation
rec_300 = cornac.metrics.Recall(k=300)

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[ctr],
metrics=[rec_300])
exp.run()
cornac.Experiment(eval_method=ratio_split, models=[ctr], metrics=[rec_300]).run()
51 changes: 35 additions & 16 deletions examples/cvae_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,52 @@
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer


# CVAE composes a variational autoencoder with matrix factorization to model item (article) texts and user-item preferences
# The necessary data can be loaded as follows
docs, item_ids = citeulike.load_text()
feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))

# Instantiate a TextModality, it make it convenient to work with text auxiliary information
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
item_text_modality = TextModality(
corpus=docs,
ids=item_ids,
tokenizer=BaseTokenizer(stop_words="english"),
max_vocab=8000,
max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
rating_threshold=0.5, verbose=True, seed=123,
item_text=item_text_modality)
ratio_split = RatioSplit(
data=feedback,
test_size=0.2,
exclude_unknowns=True,
rating_threshold=0.5,
verbose=True,
seed=123,
item_text=item_text_modality,
)

# Instantiate CVAE
cvae = cornac.models.CVAE(z_dim=50, vae_layers=[200, 100], act_fn='sigmoid',
input_dim=8000, lr=0.001, batch_size=128, n_epochs=100,
lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4,
seed=123, verbose=True)
# Instantiate CVAE model
cvae = cornac.models.CVAE(
z_dim=50,
vae_layers=[200, 100],
act_fn="sigmoid",
input_dim=8000,
lr=0.001,
batch_size=128,
n_epochs=100,
lambda_u=1e-4,
lambda_v=0.001,
lambda_r=10,
lambda_w=1e-4,
seed=123,
verbose=True,
)

# Use Recall@300 for evaluation
rec_300 = cornac.metrics.Recall(k=300)

# Put everything together into an experiment and run it
exp = cornac.Experiment(eval_method=ratio_split,
models=[cvae],
metrics=[rec_300])
exp.run()
cornac.Experiment(eval_method=ratio_split, models=[cvae], metrics=[rec_300]).run()
Loading

0 comments on commit b5bd8e3

Please sign in to comment.