forked from zyang1580/PDA
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
59 changed files
with
17,349 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from parse import parse_args | ||
from load_data import Data,Data2 | ||
import multiprocessing | ||
import heapq | ||
|
||
args = parse_args() | ||
|
||
if args.train == 's_condition' or args.train == 'sg_condition' or args.train == 'temp_pop' or args.train == 'us_condition': | ||
#PD/PDA/PDG/BPRMF(t)-pop | ||
data = Data2(args) | ||
else: #BPRMF | ||
data = Data(args) | ||
|
||
|
||
#sorted_id, belong, rate, usersorted_id, userbelong, userrate = data.plot_pics() | ||
Ks = eval(args.Ks) | ||
BATCH_SIZE = args.batch_size | ||
ITEM_NUM = data.n_items | ||
USER_NUM = data.n_users | ||
|
||
points = [10, 50, 100, 200, 500] |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import argparse | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description="Run pop_bias.") | ||
parser.add_argument('--data_path', nargs='?', default='./data/', # change by zyang | ||
help='Input data path.') | ||
parser.add_argument('--dataset', nargs='?', default='kwai', | ||
help='Choose a dataset from {movielens_ml_1m, movielens_ml_10m, gowalla}') | ||
parser.add_argument('--source', nargs='?', default='normal', | ||
help='...') # not used | ||
parser.add_argument('--train', nargs='?', default='normal', | ||
help='normal(MF) | s_condition (PD/PDA)| temp (BPRMF(t)-pop)') | ||
parser.add_argument('--test', nargs='?', default='normal', | ||
help='normal(MF) | s_condition (PD/PDA)| temp (BPRMF(t)-pop)') | ||
parser.add_argument('--valid_set', nargs='?', default='test', | ||
help='test | valid') | ||
parser.add_argument('--save_dir',nargs='?',default="/data/zyang/save_model/", | ||
help='save path') | ||
|
||
parser.add_argument('--alpha', type=float, default=1e-3, # not used | ||
help='alpha') | ||
parser.add_argument('--beta', type=float, default=1e-3, # not used | ||
help='beta') | ||
|
||
parser.add_argument('--pc_alpha', type=float, default=0.1, # not used | ||
help='alpha') | ||
parser.add_argument('--pc_beta', type=float, default=0.1, # not used | ||
help='beta') | ||
|
||
parser.add_argument('--exp_init_values',type=float,default=0.1,help='power coff initial value') | ||
parser.add_argument('--pop_exp', type=float, default=0.1, | ||
help='popularity power coff') # gamma in paper | ||
parser.add_argument('--early_stop', type=int, default=1, | ||
help='alpha') | ||
parser.add_argument('--need_save', type=int, default=1, | ||
help='0: do not save model, 1:saving') | ||
parser.add_argument('--cores', type=int, default=1, | ||
help='cores for prefetch') | ||
|
||
parser.add_argument('--verbose', type=int, default=1, | ||
help='Interval of evaluation.') | ||
parser.add_argument('--epoch', type=int, default=400, | ||
help='Number of epoch.') | ||
parser.add_argument('--load_epoch', type=int, default=400, | ||
help='Epoch which to load, for pretraining.') # not used | ||
parser.add_argument('--embed_size', type=int, default=64, | ||
help='Embedding size.') | ||
parser.add_argument('--batch_size', type=int, default=1024, | ||
help='Batch size.') | ||
parser.add_argument('--Ks', nargs='?', default='[20]', | ||
help='Evaluate on Ks optimal items.') | ||
parser.add_argument('--epochs', nargs='?', default='[]', | ||
help='Test c on these epochs.') | ||
parser.add_argument('--regs', type=float, default=1e-5, | ||
help='Regularizations.') | ||
parser.add_argument('--fregs', type=float, default=1e-5, | ||
help='fine-tune Regularizations.') # not used | ||
parser.add_argument('--c', type=float, default=10.0, | ||
help='Constant c.') # not used | ||
parser.add_argument('--train_c', type=str, default="val", | ||
help='val | test') # not used | ||
parser.add_argument('--lr', type=float, default=1e-3, | ||
help='Learning rate.') | ||
parser.add_argument('--wd', type=float, default=1e-5, | ||
help='Weight decay of optimizer.') # not used | ||
parser.add_argument('--model', nargs='?', default='mf', | ||
help='Specify model type, choose from {mf, CausalE}') | ||
parser.add_argument('--skew', type=int, default=0, | ||
help='Use not skewed dataset.') # not used | ||
parser.add_argument('--model_type', nargs='?', default='o', | ||
help='Specify model type, choose from {o, c, ic, rc, irc}') # not used | ||
parser.add_argument('--devide_ratio', type=float, default=0.8, | ||
help='Train/Test.') # not used | ||
parser.add_argument('--save_flag', type=int, default=1, | ||
help='0: Disable model saver, 1: Activate model saver') | ||
|
||
parser.add_argument('--pop_used', type=int, default=-2, | ||
help='pop_rate used in test') # not used | ||
|
||
parser.add_argument('--cuda', type=str, default='1', | ||
help='Avaiable GPU ID') | ||
parser.add_argument('--pretrain', type=int, default=0, | ||
help='0: no pretrain, 1: load pretrain model') # not used | ||
parser.add_argument('--check_c', type=int, default=1, | ||
help='0: no checking, 1: check a range of cs') # not used | ||
parser.add_argument('--log_interval', type=int, default=10, | ||
help='log\'s interval epoch while training') | ||
parser.add_argument('--pop_wd', type=float, default=0., | ||
help='weight decay of popularity') # not used | ||
parser.add_argument('--base', type=float, default=-1., | ||
help='check range base.') # not used | ||
parser.add_argument('--cf_pen', type=float, default=1.0, | ||
help='Imbalance loss.') # not used | ||
parser.add_argument('--saveID', nargs='?', default='', | ||
help='Specify model save path.') | ||
parser.add_argument('--user_min', type=int, default=1, | ||
help='user_min.') # not used | ||
parser.add_argument('--user_max', type=int, default=1000, | ||
help='user max per cls.') # not used | ||
parser.add_argument('--data_type', nargs='?', default='ori', | ||
help='load imbalanced data or not.') | ||
parser.add_argument('--imb_type', nargs='?', default='exp', | ||
help='imbalance type.') # not used | ||
parser.add_argument('--top_ratio', type=float, default=0.1, | ||
help='imbalance top ratio.') # not used | ||
parser.add_argument('--lam', type=float, default=1., | ||
help='lambda.') # not used | ||
parser.add_argument('--check_epoch', nargs='?', default='all', | ||
help='check all epochs or select some or search in range.') # not used | ||
parser.add_argument('--start', type=float, default=-1., | ||
help='check c start.') # not used | ||
parser.add_argument('--end', type=float, default=1., | ||
help='check c end.') # not used | ||
parser.add_argument('--step', type=int, default=20, | ||
help='check c step.') # not used | ||
parser.add_argument('--out', type=int, default=0) # not used | ||
return parser.parse_args() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
from NeuRec.MF.load_data import Data | ||
import numpy as np | ||
from prefetch_generator import background | ||
|
||
@background(max_prefetch=3) | ||
def multi_sampling(): | ||
worker = 10 | ||
pool = multiprocessing.Pool(worker) | ||
all_users = data.train_user_list.keys() | ||
sampled_data = pool.map(sampling_one_user,all_users) | ||
users = [] | ||
pos_items = [] | ||
neg_items = [] | ||
for re in sampled_data: | ||
users.extend(re['user']) | ||
pos_items.extend(re['pos']) | ||
neg_items.append(re['neg']) | ||
return users,pos_items,neg_items | ||
|
||
def sampling_one_user(u): | ||
pos_items = data.train_user_list[u] | ||
N_ps = len(pos_items) | ||
neg_items = [] | ||
n_items = data.n_items | ||
for i in range(N_ps): | ||
one_neg = np.random.randint(n_items) | ||
while one_neg in pos_items: | ||
one_neg = np.random.randint(n_items) | ||
neg_items.append(one_neg) | ||
users = [u] * N_ps | ||
return {'user':users,'pos':pos_items,'neg':neg_items} | ||
|
||
|
||
def _batch_sampling(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict): | ||
''' | ||
subprocess | ||
:param itr: | ||
:param pos_dict: | ||
:param neg_dict: | ||
:param tot_neg: | ||
:return: | ||
''' | ||
neg_items = np.zeros([itr.shape[0], tot_neg]) | ||
expo_flag = np.zeros([itr.shape[0], tot_neg]) | ||
p = np.random.rand(itr.shape[0], tot_neg) | ||
k = 0 | ||
for x in itr: | ||
u = x[0] | ||
try: | ||
idx1 = np.where(p[k] <= p_thre)[0] | ||
idx2 = np.where(p[k] > p_thre)[0] | ||
neg_items[k, idx1] = np.random.choice(neg_dict[u], size=idx1.shape[0]) | ||
expo_flag[k, idx1] += 1 | ||
for idx2_i in idx2: | ||
temp = np.random.randint(item_max) | ||
while temp in pos_dict[u]: | ||
temp = np.random.randint(item_max) | ||
neg_items[k, idx2_i] = temp | ||
|
||
except: | ||
idx2 = np.arange(tot_neg) | ||
for idx2_i in idx2: | ||
temp = np.random.randint(item_max) | ||
while temp in pos_dict[u]: | ||
temp = np.random.randint(item_max) | ||
neg_items[k, idx2_i] = temp | ||
k += 1 | ||
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1) | ||
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1) | ||
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1]) | ||
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1) | ||
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)] | ||
|
||
def _batch_sampling2(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict): | ||
''' | ||
subprocess | ||
:param itr: | ||
:param pos_dict: | ||
:param neg_dict: | ||
:param tot_neg: | ||
:return: | ||
''' | ||
neg_items = np.zeros([itr.shape[0], tot_neg]) | ||
expo_flag = np.zeros([itr.shape[0], tot_neg]) | ||
p = np.random.rand(itr.shape[0], tot_neg) | ||
k = 0 | ||
for x in itr: | ||
u = x[0] | ||
pos_item = pos_dict[u] | ||
try: | ||
neg_item_u = neg_dict[u] | ||
idx1 = np.where(p[k] <= p_thre)[0] | ||
idx2 = np.where(p[k] > p_thre)[0] | ||
except: | ||
idx1 = None | ||
idx2 = np.arange(tot_neg) | ||
if idx1 is not None and idx1.shape[0]>0: | ||
neg_items[k, idx1] = np.random.choice(neg_item_u, size=idx1.shape[0]) | ||
expo_flag[k, idx1] += 1 | ||
if idx2.shape[0] > 0: | ||
l2 = idx2.shape[0] | ||
tmp = np.random.randint(item_max, size=l2 * 5) # sampling 5 times items | ||
tmp = np.setdiff1d(tmp, pos_item,True) | ||
if tmp.shape[0] >= l2: # sampling enough | ||
neg_items[k, idx2] = tmp[:l2] | ||
else: # not enough | ||
tmp = np.random.randint(item_max, size=l2 * 10) #sampling more | ||
tmp = np.setdiff1d(tmp, pos_item,assume_unique=True) | ||
l_t = min(tmp.shape[0], l2) | ||
idx2_t = idx2[:l_t] | ||
neg_items[k, idx2_t] = tmp[:l_t] # saving not in pos | ||
for idx2_i in idx2[l_t:]: # sampling others | ||
temp = np.random.randint(item_max) | ||
while temp in pos_item: | ||
temp = np.random.randint(item_max) | ||
neg_items[k, idx2_i] = temp | ||
k += 1 | ||
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1) | ||
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1) | ||
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1]) | ||
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1) | ||
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)] | ||
|
||
def _batch_sampling3(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict): | ||
''' | ||
subprocess, this process in random sampling stage, we will make sure that the sampled items not from neg interactions. | ||
:param itr: | ||
:param pos_dict: | ||
:param neg_dict: | ||
:param tot_neg: | ||
:param neg_pro_dict: probability of sampling for items in the neg_dict | ||
:return: | ||
''' | ||
neg_items = np.zeros([itr.shape[0], tot_neg]) | ||
expo_flag = np.zeros([itr.shape[0], tot_neg]) | ||
p = np.random.rand(itr.shape[0], tot_neg) | ||
k = 0 | ||
for x in itr: | ||
u = x[0] | ||
pos_item = pos_dict[u] | ||
try: | ||
neg_item_u = neg_dict[u] | ||
if neg_pro_dict is not None: | ||
neg_item_p = neg_pro_dict[u] | ||
else: | ||
neg_item_p = None | ||
idx1 = np.where(p[k] <= p_thre)[0] | ||
idx2 = np.where(p[k] > p_thre)[0] | ||
except: | ||
idx1 = None | ||
neg_item_u = None | ||
idx2 = np.arange(tot_neg) | ||
if idx1 is not None and idx1.shape[0]>0: | ||
neg_items[k, idx1] = np.random.choice(neg_item_u, size=idx1.shape[0],p=neg_item_p) | ||
expo_flag[k, idx1] += 1 | ||
if idx2.shape[0] > 0: | ||
l2 = idx2.shape[0] | ||
tmp = np.random.randint(0, item_max, size=l2 * 10) # sampling 5 times items | ||
if neg_item_u is not None: | ||
itr_items = np.concatenate([pos_item, neg_item_u], axis=0) | ||
else: | ||
itr_items = pos_item | ||
tmp = np.setdiff1d(tmp, itr_items,assume_unique=True) | ||
if tmp.shape[0] >= l2: # sampling enough | ||
neg_items[k, idx2] = tmp[:l2] | ||
else: # not enough | ||
tmp = np.random.randint(0, item_max, size=l2 * 20) #sampling more | ||
tmp = np.setdiff1d(tmp, itr_items,assume_unique=True) | ||
l_t = min(tmp.shape[0], l2) | ||
idx2_t = idx2[:l_t] | ||
neg_items[k, idx2_t] = tmp[:l_t] # saving not in pos | ||
for idx2_i in idx2[l_t:]: # sampling others | ||
temp = np.random.randint(item_max) | ||
while temp in itr_items: | ||
temp = np.random.randint(item_max) | ||
neg_items[k, idx2_i] = temp | ||
k += 1 | ||
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1) | ||
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1) | ||
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1]) | ||
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1) | ||
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)] |
Oops, something went wrong.