Skip to content

Commit

Permalink
[KG] Disable filter in evaluation (dmlc#1162)
Browse files Browse the repository at this point in the history
* add no_eval_filter

* fix eval.
  • Loading branch information
zheng-da authored and classicsong committed Jan 3, 2020
1 parent 7451bb2 commit 346bc23
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 12 deletions.
19 changes: 12 additions & 7 deletions apps/kg/dataloader/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes):
neg_sample_size, neg_head)

class EvalSampler(object):
def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers):
def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers,
filter_false_neg):
EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
self.sampler = EdgeSampler(g,
batch_size=batch_size,
Expand All @@ -177,27 +178,30 @@ def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers):
shuffle=False,
exclude_positive=False,
relations=g.edata['id'],
return_false_neg=True)
return_false_neg=filter_false_neg)
self.sampler_iter = iter(self.sampler)
self.mode = mode
self.neg_head = 'head' in mode
self.g = g
self.filter_false_neg = filter_false_neg

def __iter__(self):
return self

def __next__(self):
while True:
pos_g, neg_g = next(self.sampler_iter)
neg_positive = neg_g.edata['false_neg']
if self.filter_false_neg:
neg_positive = neg_g.edata['false_neg']
neg_g = create_neg_subgraph(pos_g, neg_g, 'PBG' in self.mode,
self.neg_head, self.g.number_of_nodes())
if neg_g is not None:
break

pos_g.copy_from_parent()
neg_g.copy_from_parent()
neg_g.edata['bias'] = F.astype(-neg_positive, F.float32)
if self.filter_false_neg:
neg_g.edata['bias'] = F.astype(-neg_positive, F.float32)
return pos_g, neg_g

def reset(self):
Expand Down Expand Up @@ -276,14 +280,15 @@ def check(self, eval_type):
np.testing.assert_equal(F.asnumpy(dst_id), orig_dst)
np.testing.assert_equal(F.asnumpy(etype), orig_etype)

def create_sampler(self, eval_type, batch_size, neg_sample_size, mode='head',
num_workers=5, rank=0, ranks=1):
def create_sampler(self, eval_type, batch_size, neg_sample_size,
filter_false_neg, mode='head', num_workers=5, rank=0, ranks=1):
edges = self.get_edges(eval_type)
beg = edges.shape[0] * rank // ranks
end = min(edges.shape[0] * (rank + 1) // ranks, edges.shape[0])
edges = edges[beg: end]
print("eval on {} edges".format(len(edges)))
return EvalSampler(self.g, edges, batch_size, neg_sample_size, mode, num_workers)
return EvalSampler(self.g, edges, batch_size, neg_sample_size,
mode, num_workers, filter_false_neg)

class NewBidirectionalOneShotIterator:
def __init__(self, dataloader_head, dataloader_tail, is_pbg, num_nodes):
Expand Down
9 changes: 8 additions & 1 deletion apps/kg/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self):
super(ArgParser, self).__init__()

self.add_argument('--model_name', default='TransE',
choices=['TransE', 'TransH', 'TransR', 'TransD',
choices=['TransE', 'TransE_l1', 'TransE_l2', 'TransH', 'TransR', 'TransD',
'RESCAL', 'DistMult', 'ComplEx', 'RotatE', 'pRotatE'],
help='model to use')
self.add_argument('--data_path', type=str, default='data',
Expand All @@ -44,6 +44,8 @@ def __init__(self):
help='margin value')
self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')

self.add_argument('--gpu', type=int, default=-1,
help='use GPU')
Expand Down Expand Up @@ -99,17 +101,20 @@ def main(args):
args.neg_sample_size_test = args.neg_sample_size
if args.neg_sample_size < 0:
args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes()
args.eval_filter = not args.no_eval_filter
if args.num_proc > 1:
test_sampler_tails = []
test_sampler_heads = []
for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
Expand All @@ -118,11 +123,13 @@ def main(args):
else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
Expand Down
9 changes: 5 additions & 4 deletions apps/kg/models/general_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,11 @@ def forward_test(self, pos_g, neg_g, logs, gpu_id=-1):
neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1)

# We need to filter the positive edges in the negative graph.
filter_bias = reshape(neg_g.edata['bias'], batch_size, -1)
if self.args.gpu >= 0:
filter_bias = cuda(filter_bias, self.args.gpu)
neg_scores += filter_bias
if self.args.eval_filter:
filter_bias = reshape(neg_g.edata['bias'], batch_size, -1)
if self.args.gpu >= 0:
filter_bias = cuda(filter_bias, self.args.gpu)
neg_scores += filter_bias
# To compute the rank of a positive edge among all negative edges,
# we need to know how many negative edges have higher scores than
# the positive edge.
Expand Down
11 changes: 11 additions & 0 deletions apps/kg/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def __init__(self):
help='margin value')
self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')

self.add_argument('--gpu', type=int, default=-1,
help='use GPU')
Expand Down Expand Up @@ -135,6 +137,7 @@ def run(args, logger):
n_relations = dataset.n_relations
if args.neg_sample_size_test < 0:
args.neg_sample_size_test = n_entities
args.eval_filter = not args.no_eval_filter

train_data = TrainDataset(dataset, args, ranks=args.num_proc)
if args.num_proc > 1:
Expand Down Expand Up @@ -179,11 +182,13 @@ def run(args, logger):
for i in range(args.num_proc):
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
Expand All @@ -192,11 +197,13 @@ def run(args, logger):
else:
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
Expand All @@ -209,11 +216,13 @@ def run(args, logger):
for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
Expand All @@ -222,11 +231,13 @@ def run(args, logger):
else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
Expand Down

0 comments on commit 346bc23

Please sign in to comment.