Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
zyang1580 committed May 20, 2021
1 parent c8b3c14 commit fbfa4e4
Show file tree
Hide file tree
Showing 59 changed files with 17,349 additions and 0 deletions.
1,412 changes: 1,412 additions & 0 deletions MF/BPR_PC.py

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions MF/batch_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from parse import parse_args
from load_data import Data,Data2
import multiprocessing
import heapq

args = parse_args()

if args.train == 's_condition' or args.train == 'sg_condition' or args.train == 'temp_pop' or args.train == 'us_condition':
#PD/PDA/PDG/BPRMF(t)-pop
data = Data2(args)
else: #BPRMF
data = Data(args)


#sorted_id, belong, rate, usersorted_id, userbelong, userrate = data.plot_pics()
Ks = eval(args.Ks)
BATCH_SIZE = args.batch_size
ITEM_NUM = data.n_items
USER_NUM = data.n_users

points = [10, 50, 100, 200, 500]
877 changes: 877 additions & 0 deletions MF/load_data.py

Large diffs are not rendered by default.

931 changes: 931 additions & 0 deletions MF/model_api.py

Large diffs are not rendered by default.

117 changes: 117 additions & 0 deletions MF/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import argparse

def parse_args():
parser = argparse.ArgumentParser(description="Run pop_bias.")
parser.add_argument('--data_path', nargs='?', default='./data/', # change by zyang
help='Input data path.')
parser.add_argument('--dataset', nargs='?', default='kwai',
help='Choose a dataset from {movielens_ml_1m, movielens_ml_10m, gowalla}')
parser.add_argument('--source', nargs='?', default='normal',
help='...') # not used
parser.add_argument('--train', nargs='?', default='normal',
help='normal(MF) | s_condition (PD/PDA)| temp (BPRMF(t)-pop)')
parser.add_argument('--test', nargs='?', default='normal',
help='normal(MF) | s_condition (PD/PDA)| temp (BPRMF(t)-pop)')
parser.add_argument('--valid_set', nargs='?', default='test',
help='test | valid')
parser.add_argument('--save_dir',nargs='?',default="/data/zyang/save_model/",
help='save path')

parser.add_argument('--alpha', type=float, default=1e-3, # not used
help='alpha')
parser.add_argument('--beta', type=float, default=1e-3, # not used
help='beta')

parser.add_argument('--pc_alpha', type=float, default=0.1, # not used
help='alpha')
parser.add_argument('--pc_beta', type=float, default=0.1, # not used
help='beta')

parser.add_argument('--exp_init_values',type=float,default=0.1,help='power coff initial value')
parser.add_argument('--pop_exp', type=float, default=0.1,
help='popularity power coff') # gamma in paper
parser.add_argument('--early_stop', type=int, default=1,
help='alpha')
parser.add_argument('--need_save', type=int, default=1,
help='0: do not save model, 1:saving')
parser.add_argument('--cores', type=int, default=1,
help='cores for prefetch')

parser.add_argument('--verbose', type=int, default=1,
help='Interval of evaluation.')
parser.add_argument('--epoch', type=int, default=400,
help='Number of epoch.')
parser.add_argument('--load_epoch', type=int, default=400,
help='Epoch which to load, for pretraining.') # not used
parser.add_argument('--embed_size', type=int, default=64,
help='Embedding size.')
parser.add_argument('--batch_size', type=int, default=1024,
help='Batch size.')
parser.add_argument('--Ks', nargs='?', default='[20]',
help='Evaluate on Ks optimal items.')
parser.add_argument('--epochs', nargs='?', default='[]',
help='Test c on these epochs.')
parser.add_argument('--regs', type=float, default=1e-5,
help='Regularizations.')
parser.add_argument('--fregs', type=float, default=1e-5,
help='fine-tune Regularizations.') # not used
parser.add_argument('--c', type=float, default=10.0,
help='Constant c.') # not used
parser.add_argument('--train_c', type=str, default="val",
help='val | test') # not used
parser.add_argument('--lr', type=float, default=1e-3,
help='Learning rate.')
parser.add_argument('--wd', type=float, default=1e-5,
help='Weight decay of optimizer.') # not used
parser.add_argument('--model', nargs='?', default='mf',
help='Specify model type, choose from {mf, CausalE}')
parser.add_argument('--skew', type=int, default=0,
help='Use not skewed dataset.') # not used
parser.add_argument('--model_type', nargs='?', default='o',
help='Specify model type, choose from {o, c, ic, rc, irc}') # not used
parser.add_argument('--devide_ratio', type=float, default=0.8,
help='Train/Test.') # not used
parser.add_argument('--save_flag', type=int, default=1,
help='0: Disable model saver, 1: Activate model saver')

parser.add_argument('--pop_used', type=int, default=-2,
help='pop_rate used in test') # not used

parser.add_argument('--cuda', type=str, default='1',
help='Avaiable GPU ID')
parser.add_argument('--pretrain', type=int, default=0,
help='0: no pretrain, 1: load pretrain model') # not used
parser.add_argument('--check_c', type=int, default=1,
help='0: no checking, 1: check a range of cs') # not used
parser.add_argument('--log_interval', type=int, default=10,
help='log\'s interval epoch while training')
parser.add_argument('--pop_wd', type=float, default=0.,
help='weight decay of popularity') # not used
parser.add_argument('--base', type=float, default=-1.,
help='check range base.') # not used
parser.add_argument('--cf_pen', type=float, default=1.0,
help='Imbalance loss.') # not used
parser.add_argument('--saveID', nargs='?', default='',
help='Specify model save path.')
parser.add_argument('--user_min', type=int, default=1,
help='user_min.') # not used
parser.add_argument('--user_max', type=int, default=1000,
help='user max per cls.') # not used
parser.add_argument('--data_type', nargs='?', default='ori',
help='load imbalanced data or not.')
parser.add_argument('--imb_type', nargs='?', default='exp',
help='imbalance type.') # not used
parser.add_argument('--top_ratio', type=float, default=0.1,
help='imbalance top ratio.') # not used
parser.add_argument('--lam', type=float, default=1.,
help='lambda.') # not used
parser.add_argument('--check_epoch', nargs='?', default='all',
help='check all epochs or select some or search in range.') # not used
parser.add_argument('--start', type=float, default=-1.,
help='check c start.') # not used
parser.add_argument('--end', type=float, default=1.,
help='check c end.') # not used
parser.add_argument('--step', type=int, default=20,
help='check c step.') # not used
parser.add_argument('--out', type=int, default=0) # not used
return parser.parse_args()
182 changes: 182 additions & 0 deletions MF/sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
from NeuRec.MF.load_data import Data
import numpy as np
from prefetch_generator import background

@background(max_prefetch=3)
def multi_sampling():
worker = 10
pool = multiprocessing.Pool(worker)
all_users = data.train_user_list.keys()
sampled_data = pool.map(sampling_one_user,all_users)
users = []
pos_items = []
neg_items = []
for re in sampled_data:
users.extend(re['user'])
pos_items.extend(re['pos'])
neg_items.append(re['neg'])
return users,pos_items,neg_items

def sampling_one_user(u):
pos_items = data.train_user_list[u]
N_ps = len(pos_items)
neg_items = []
n_items = data.n_items
for i in range(N_ps):
one_neg = np.random.randint(n_items)
while one_neg in pos_items:
one_neg = np.random.randint(n_items)
neg_items.append(one_neg)
users = [u] * N_ps
return {'user':users,'pos':pos_items,'neg':neg_items}


def _batch_sampling(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict):
'''
subprocess
:param itr:
:param pos_dict:
:param neg_dict:
:param tot_neg:
:return:
'''
neg_items = np.zeros([itr.shape[0], tot_neg])
expo_flag = np.zeros([itr.shape[0], tot_neg])
p = np.random.rand(itr.shape[0], tot_neg)
k = 0
for x in itr:
u = x[0]
try:
idx1 = np.where(p[k] <= p_thre)[0]
idx2 = np.where(p[k] > p_thre)[0]
neg_items[k, idx1] = np.random.choice(neg_dict[u], size=idx1.shape[0])
expo_flag[k, idx1] += 1
for idx2_i in idx2:
temp = np.random.randint(item_max)
while temp in pos_dict[u]:
temp = np.random.randint(item_max)
neg_items[k, idx2_i] = temp

except:
idx2 = np.arange(tot_neg)
for idx2_i in idx2:
temp = np.random.randint(item_max)
while temp in pos_dict[u]:
temp = np.random.randint(item_max)
neg_items[k, idx2_i] = temp
k += 1
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1)
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1)
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1])
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1)
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)]

def _batch_sampling2(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict):
'''
subprocess
:param itr:
:param pos_dict:
:param neg_dict:
:param tot_neg:
:return:
'''
neg_items = np.zeros([itr.shape[0], tot_neg])
expo_flag = np.zeros([itr.shape[0], tot_neg])
p = np.random.rand(itr.shape[0], tot_neg)
k = 0
for x in itr:
u = x[0]
pos_item = pos_dict[u]
try:
neg_item_u = neg_dict[u]
idx1 = np.where(p[k] <= p_thre)[0]
idx2 = np.where(p[k] > p_thre)[0]
except:
idx1 = None
idx2 = np.arange(tot_neg)
if idx1 is not None and idx1.shape[0]>0:
neg_items[k, idx1] = np.random.choice(neg_item_u, size=idx1.shape[0])
expo_flag[k, idx1] += 1
if idx2.shape[0] > 0:
l2 = idx2.shape[0]
tmp = np.random.randint(item_max, size=l2 * 5) # sampling 5 times items
tmp = np.setdiff1d(tmp, pos_item,True)
if tmp.shape[0] >= l2: # sampling enough
neg_items[k, idx2] = tmp[:l2]
else: # not enough
tmp = np.random.randint(item_max, size=l2 * 10) #sampling more
tmp = np.setdiff1d(tmp, pos_item,assume_unique=True)
l_t = min(tmp.shape[0], l2)
idx2_t = idx2[:l_t]
neg_items[k, idx2_t] = tmp[:l_t] # saving not in pos
for idx2_i in idx2[l_t:]: # sampling others
temp = np.random.randint(item_max)
while temp in pos_item:
temp = np.random.randint(item_max)
neg_items[k, idx2_i] = temp
k += 1
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1)
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1)
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1])
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1)
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)]

def _batch_sampling3(itr,pos_dict,neg_dict,tot_neg,batch_epoch,p_thre,item_max,neg_pro_dict):
'''
subprocess, this process in random sampling stage, we will make sure that the sampled items not from neg interactions.
:param itr:
:param pos_dict:
:param neg_dict:
:param tot_neg:
:param neg_pro_dict: probability of sampling for items in the neg_dict
:return:
'''
neg_items = np.zeros([itr.shape[0], tot_neg])
expo_flag = np.zeros([itr.shape[0], tot_neg])
p = np.random.rand(itr.shape[0], tot_neg)
k = 0
for x in itr:
u = x[0]
pos_item = pos_dict[u]
try:
neg_item_u = neg_dict[u]
if neg_pro_dict is not None:
neg_item_p = neg_pro_dict[u]
else:
neg_item_p = None
idx1 = np.where(p[k] <= p_thre)[0]
idx2 = np.where(p[k] > p_thre)[0]
except:
idx1 = None
neg_item_u = None
idx2 = np.arange(tot_neg)
if idx1 is not None and idx1.shape[0]>0:
neg_items[k, idx1] = np.random.choice(neg_item_u, size=idx1.shape[0],p=neg_item_p)
expo_flag[k, idx1] += 1
if idx2.shape[0] > 0:
l2 = idx2.shape[0]
tmp = np.random.randint(0, item_max, size=l2 * 10) # sampling 5 times items
if neg_item_u is not None:
itr_items = np.concatenate([pos_item, neg_item_u], axis=0)
else:
itr_items = pos_item
tmp = np.setdiff1d(tmp, itr_items,assume_unique=True)
if tmp.shape[0] >= l2: # sampling enough
neg_items[k, idx2] = tmp[:l2]
else: # not enough
tmp = np.random.randint(0, item_max, size=l2 * 20) #sampling more
tmp = np.setdiff1d(tmp, itr_items,assume_unique=True)
l_t = min(tmp.shape[0], l2)
idx2_t = idx2[:l_t]
neg_items[k, idx2_t] = tmp[:l_t] # saving not in pos
for idx2_i in idx2[l_t:]: # sampling others
temp = np.random.randint(item_max)
while temp in itr_items:
temp = np.random.randint(item_max)
neg_items[k, idx2_i] = temp
k += 1
expo_flag = expo_flag.reshape(itr.shape[0], batch_epoch, -1)
neg_items = neg_items.reshape(itr.shape[0], batch_epoch, -1)
pos_flag = np.ones([expo_flag.shape[0], expo_flag.shape[1], 1])
expo_flag = np.concatenate([pos_flag, expo_flag], axis=-1)
return [itr,np.concatenate([neg_items,expo_flag],axis=-1)]
Loading

0 comments on commit fbfa4e4

Please sign in to comment.