Refactor code in examples (PreferredAI#317)

Hwijune · Feb 7, 2020 · b5bd8e3 · b5bd8e3
1 parent 205292e
commit b5bd8e3
Show file tree

Hide file tree

Showing 28 changed files with 529 additions and 260 deletions.
diff --git a/examples/biased_mf.py b/examples/biased_mf.py
@@ -18,27 +18,35 @@
 from cornac.datasets import movielens
 from cornac.eval_methods import RatioSplit
 
+
 # Load MovieLens 1M ratings
-ml_1m = movielens.load_feedback(variant='1M')
+ml_1m = movielens.load_feedback(variant="1M")
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=ml_1m,
-                         test_size=0.2,
-                         exclude_unknowns=False,
-                         verbose=True)
+ratio_split = RatioSplit(
+    data=ml_1m, test_size=0.2, exclude_unknowns=False, verbose=True
+)
 
 # Instantiate the global average baseline and MF model
 global_avg = cornac.models.GlobalAvg()
-mf = cornac.models.MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02,
-                      use_bias=True, early_stop=True, verbose=True)
+mf = cornac.models.MF(
+    k=10,
+    max_iter=25,
+    learning_rate=0.01,
+    lambda_reg=0.02,
+    use_bias=True,
+    early_stop=True,
+    verbose=True,
+)
 
 # Instantiate MAE and RMSE for evaluation
 mae = cornac.metrics.MAE()
 rmse = cornac.metrics.RMSE()
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[global_avg, mf],
-                        metrics=[mae, rmse],
-                        user_based=True)
-exp.run()
+cornac.Experiment(
+    eval_method=ratio_split,
+    models=[global_avg, mf],
+    metrics=[mae, rmse],
+    user_based=True,
+).run()
diff --git a/examples/bpr_netflix.py b/examples/bpr_netflix.py
@@ -32,7 +32,7 @@
     verbose=True,
 )
 
-# Instantiate the most popular baseline and BPR model
+# Instantiate the most popular baseline, BPR, and WBPR models
 most_pop = cornac.models.MostPop()
 bpr = cornac.models.BPR(
     k=50, max_iter=200, learning_rate=0.001, lambda_reg=0.001, verbose=True

diff --git a/examples/c2pf_example.py b/examples/c2pf_example.py
@@ -24,32 +24,34 @@
 from cornac.models import C2PF
 from cornac.datasets import amazon_office as office
 
+
 # In addition to user-item feedback, C2PF integrates item-to-item contextual relationships
 # The necessary data can be loaded as follows
 ratings = office.load_feedback()
 contexts = office.load_graph()
 
-# Instantiate a GraphModality, it make it convenient to work with graph (network) auxiliary information
+# Instantiate a GraphModality, it makes it convenient to work with graph (network) auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
 item_graph_modality = GraphModality(data=contexts)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=ratings,
-                         test_size=0.2, rating_threshold=3.5,
-                         exclude_unknowns=True, verbose=True,
-                         item_graph=item_graph_modality)
+ratio_split = RatioSplit(
+    data=ratings,
+    test_size=0.2,
+    rating_threshold=3.5,
+    exclude_unknowns=True,
+    verbose=True,
+    item_graph=item_graph_modality,
+)
 
 # Instantiate C2PF
-c2pf = C2PF(k=100, max_iter=80, variant='c2pf')
+c2pf = C2PF(k=100, max_iter=80, variant="c2pf")
 
 # Evaluation metrics
-nDgc = metrics.NDCG(k=-1)
+ndcg = metrics.NDCG(k=-1)
 mrr = metrics.MRR()
 rec = metrics.Recall(k=20)
 pre = metrics.Precision(k=20)
 
 # Put everything together into an experiment and run it
-exp = Experiment(eval_method=ratio_split,
-                 models=[c2pf],
-                 metrics=[nDgc, mrr, rec, pre])
-exp.run()
+Experiment(eval_method=ratio_split, models=[c2pf], metrics=[ndcg, mrr, rec, pre]).run()
diff --git a/examples/cdl_example.py b/examples/cdl_example.py
@@ -21,30 +21,46 @@
 from cornac.data import TextModality
 from cornac.data.text import BaseTokenizer
 
+
 # CDL composes an autoencoder with matrix factorization to model item (article) texts and user-item preferences
 # The necessary data can be loaded as follows
 docs, item_ids = citeulike.load_text()
 feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))
 
-# Instantiate a TextModality, it make it convenient to work with text auxiliary information
+# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
-item_text_modality = TextModality(corpus=docs, ids=item_ids,
-                                  tokenizer=BaseTokenizer(stop_words='english'),
-                                  max_vocab=8000, max_doc_freq=0.5)
+item_text_modality = TextModality(
+    corpus=docs,
+    ids=item_ids,
+    tokenizer=BaseTokenizer(stop_words="english"),
+    max_vocab=8000,
+    max_doc_freq=0.5,
+)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
-                         item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
+ratio_split = RatioSplit(
+    data=feedback,
+    test_size=0.2,
+    exclude_unknowns=True,
+    item_text=item_text_modality,
+    verbose=True,
+    seed=123,
+    rating_threshold=0.5,
+)
 
-# Instantiate CDL
-cdl = cornac.models.CDL(k=50, autoencoder_structure=[200], max_iter=30,
-                        lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000)
+# Instantiate CDL model
+cdl = cornac.models.CDL(
+    k=50,
+    autoencoder_structure=[200],
+    max_iter=30,
+    lambda_u=0.1,
+    lambda_v=1,
+    lambda_w=0.1,
+    lambda_n=1000,
+)
 
 # Use Recall@300 for evaluation
 rec_300 = cornac.metrics.Recall(k=300)
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[cdl],
-                        metrics=[rec_300])
-exp.run()
+cornac.Experiment(eval_method=ratio_split, models=[cdl], metrics=[rec_300]).run()
diff --git a/examples/cdr_example.py b/examples/cdr_example.py
@@ -21,31 +21,49 @@
 from cornac.data import TextModality
 from cornac.data.text import BaseTokenizer
 
+
 # CDR composes an autoencoder with a ranking collaborative model to represent item texts and user-item interactions
 # The necessary data can be loaded as follows
 docs, item_ids = citeulike.load_text()
 feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))
 
-# Instantiate a TextModality, it make it convenient to work with text auxiliary information
+# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
-item_text_modality = TextModality(corpus=docs, ids=item_ids,
-                                  tokenizer=BaseTokenizer(stop_words='english'),
-                                  max_vocab=8000, max_doc_freq=0.5)
+item_text_modality = TextModality(
+    corpus=docs,
+    ids=item_ids,
+    tokenizer=BaseTokenizer(stop_words="english"),
+    max_vocab=8000,
+    max_doc_freq=0.5,
+)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
-                         item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
+ratio_split = RatioSplit(
+    data=feedback,
+    test_size=0.2,
+    exclude_unknowns=True,
+    item_text=item_text_modality,
+    verbose=True,
+    seed=123,
+    rating_threshold=0.5,
+)
 
-# Instantiate CDR
-cdr = cornac.models.CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128,
-                        lambda_u=0.01, lambda_v=0.1, lambda_w=0.0001, lambda_n=5,
-                        learning_rate=0.001, vocab_size=8000)
+# Instantiate CDR model
+cdr = cornac.models.CDR(
+    k=50,
+    autoencoder_structure=[200],
+    max_iter=100,
+    batch_size=128,
+    lambda_u=0.01,
+    lambda_v=0.1,
+    lambda_w=0.0001,
+    lambda_n=5,
+    learning_rate=0.001,
+    vocab_size=8000,
+)
 
 # Use Recall@300 for evaluation
 rec_300 = cornac.metrics.Recall(k=300)
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[cdr],
-                        metrics=[rec_300])
-exp.run()
+cornac.Experiment(eval_method=ratio_split, models=[cdr], metrics=[rec_300]).run()
diff --git a/examples/conv_mf_example.py b/examples/conv_mf_example.py
@@ -21,30 +21,39 @@
 from cornac.data import TextModality
 from cornac.data.text import BaseTokenizer
 
+
 # ConvMF extends matrix factorization to leverage item textual information
 # The necessary data can be loaded as follows
 plots, movie_ids = movielens.load_plot()
-ml_1m = movielens.load_feedback(variant='1M', reader=Reader(item_set=movie_ids))
+ml_1m = movielens.load_feedback(variant="1M", reader=Reader(item_set=movie_ids))
 
-# Instantiate a TextModality, it make it convenient to work with text auxiliary information
+# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
-item_text_modality = TextModality(corpus=plots, ids=movie_ids,
-                                  tokenizer=BaseTokenizer(sep='\t', stop_words='english'),
-                                  max_vocab=8000, max_doc_freq=0.5)
+item_text_modality = TextModality(
+    corpus=plots,
+    ids=movie_ids,
+    tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
+    max_vocab=8000,
+    max_doc_freq=0.5,
+)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=ml_1m, test_size=0.2, exclude_unknowns=True,
-                         item_text=item_text_modality, verbose=True, seed=123)
+ratio_split = RatioSplit(
+    data=ml_1m,
+    test_size=0.2,
+    exclude_unknowns=True,
+    item_text=item_text_modality,
+    verbose=True,
+    seed=123,
+)
 
-# Instantiate ConvMF
+# Instantiate ConvMF model
 convmf = cornac.models.ConvMF(n_epochs=5, verbose=True, seed=123)
 
 # Instantiate RMSE for evaluation
 rmse = cornac.metrics.RMSE()
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[convmf],
-                        metrics=[rmse],
-                        user_based=True)
-exp.run()
+cornac.Experiment(
+    eval_method=ratio_split, models=[convmf], metrics=[rmse], user_based=True
+).run()
diff --git a/examples/ctr_example_citeulike.py b/examples/ctr_example_citeulike.py
@@ -21,29 +21,38 @@
 from cornac.data import TextModality
 from cornac.data.text import BaseTokenizer
 
+
 # CTR composes the LDA topic model with matrix factorization to model item (article) texts and user-item preferences
 # The necessary data can be loaded as follows
 docs, item_ids = citeulike.load_text()
 feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))
 
-# Instantiate a TextModality, it make it convenient to work with text auxiliary information
+# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
-item_text_modality = TextModality(corpus=docs, ids=item_ids,
-                                  tokenizer=BaseTokenizer(sep=' ', stop_words='english'),
-                                  max_vocab=8000, max_doc_freq=0.5)
+item_text_modality = TextModality(
+    corpus=docs,
+    ids=item_ids,
+    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
+    max_vocab=8000,
+    max_doc_freq=0.5,
+)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
-                         item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
+ratio_split = RatioSplit(
+    data=feedback,
+    test_size=0.2,
+    exclude_unknowns=True,
+    item_text=item_text_modality,
+    verbose=True,
+    seed=123,
+    rating_threshold=0.5,
+)
 
-# Instantiate CTR
+# Instantiate CTR model
 ctr = cornac.models.CTR(k=50, max_iter=50, lambda_v=1)
 
 # Use Recall@300 for evaluation
 rec_300 = cornac.metrics.Recall(k=300)
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[ctr],
-                        metrics=[rec_300])
-exp.run()
+cornac.Experiment(eval_method=ratio_split, models=[ctr], metrics=[rec_300]).run()
diff --git a/examples/cvae_example.py b/examples/cvae_example.py
@@ -21,33 +21,52 @@
 from cornac.data import TextModality
 from cornac.data.text import BaseTokenizer
 
+
 # CVAE composes a variational autoencoder with matrix factorization to model item (article) texts and user-item preferences
 # The necessary data can be loaded as follows
 docs, item_ids = citeulike.load_text()
 feedback = citeulike.load_feedback(reader=Reader(item_set=item_ids))
 
-# Instantiate a TextModality, it make it convenient to work with text auxiliary information
+# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
 # For more details, please refer to the tutorial on how to work with auxiliary data
-item_text_modality = TextModality(corpus=docs, ids=item_ids,
-                                  tokenizer=BaseTokenizer(stop_words='english'),
-                                  max_vocab=8000, max_doc_freq=0.5)
+item_text_modality = TextModality(
+    corpus=docs,
+    ids=item_ids,
+    tokenizer=BaseTokenizer(stop_words="english"),
+    max_vocab=8000,
+    max_doc_freq=0.5,
+)
 
 # Define an evaluation method to split feedback into train and test sets
-ratio_split = RatioSplit(data=feedback, test_size=0.2, exclude_unknowns=True,
-                         rating_threshold=0.5, verbose=True, seed=123,
-                         item_text=item_text_modality)
+ratio_split = RatioSplit(
+    data=feedback,
+    test_size=0.2,
+    exclude_unknowns=True,
+    rating_threshold=0.5,
+    verbose=True,
+    seed=123,
+    item_text=item_text_modality,
+)
 
-# Instantiate CVAE
-cvae = cornac.models.CVAE(z_dim=50, vae_layers=[200, 100], act_fn='sigmoid',
-                          input_dim=8000, lr=0.001, batch_size=128, n_epochs=100,
-                          lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4,
-                          seed=123, verbose=True)
+# Instantiate CVAE model
+cvae = cornac.models.CVAE(
+    z_dim=50,
+    vae_layers=[200, 100],
+    act_fn="sigmoid",
+    input_dim=8000,
+    lr=0.001,
+    batch_size=128,
+    n_epochs=100,
+    lambda_u=1e-4,
+    lambda_v=0.001,
+    lambda_r=10,
+    lambda_w=1e-4,
+    seed=123,
+    verbose=True,
+)
 
 # Use Recall@300 for evaluation
 rec_300 = cornac.metrics.Recall(k=300)
 
 # Put everything together into an experiment and run it
-exp = cornac.Experiment(eval_method=ratio_split,
-                        models=[cvae],
-                        metrics=[rec_300])
-exp.run()
+cornac.Experiment(eval_method=ratio_split, models=[cvae], metrics=[rec_300]).run()