Skip to content

Commit

Permalink
Add API benchmarks (dmlc#2522)
Browse files Browse the repository at this point in the history
* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

Co-authored-by: Minjie Wang <[email protected]>
  • Loading branch information
VoVAllen and jermainewang authored Jan 14, 2021
1 parent 0778766 commit c29daae
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 11 deletions.
37 changes: 37 additions & 0 deletions benchmarks/benchmarks/api/bench_edge_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import time
import dgl
import torch
import numpy as np

from .. import utils
# edge_ids is not supported on cuda
# @utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['csr']) # csr/csc is not supported
@utils.parametrize('fraction', [0.01, 0.1])
@utils.parametrize('return_uv', [True, False])
def track_time(graph_name, format, fraction, return_uv):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
coo_graph = utils.get_graph(graph_name, 'coo')
graph = graph.to(device)
eids = np.random.choice(
np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges()*fraction))
eids = torch.tensor(eids, device="cpu", dtype=torch.int64)
u, v = coo_graph.find_edges(eids)
del coo_graph, eids
u = u.to(device)
v = v.to(device)
# dry run
for i in range(10):
out = graph.edge_ids(u[0], v[0])

# timing
t0 = time.time()
for i in range(10):
edges = graph.edge_ids(u, v, return_uv=return_uv)
t1 = time.time()

return (t1 - t0) / 10
33 changes: 33 additions & 0 deletions benchmarks/benchmarks/api/bench_find_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo']) # csc is not supported
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
eids = np.random.choice(
np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges()*fraction))
eids = torch.tensor(eids, device=device, dtype=torch.int64)
# dry run
for i in range(10):
out = graph.find_edges(i)
out = graph.find_edges(torch.arange(
i*10, dtype=torch.int64, device=device))

# timing
t0 = time.time()
for i in range(10):
edges = graph.find_edges(eids)
t1 = time.time()

return (t1 - t0) / 10
31 changes: 31 additions & 0 deletions benchmarks/benchmarks/api/bench_format_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format',
[('coo', 'csc'), ('csc', 'coo'),
('coo', 'csr'), ('csr', 'coo'),
('csr', 'csc'), ('csc', 'csr')])
def track_time(graph_name, format):
from_format, to_format = format
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, from_format)
graph = graph.to(device)
graph = graph.formats([from_format])
# dry run
graph.formats([to_format])

# timing
t0 = time.time()
for i in range(10):
gg = graph.formats([to_format])
t1 = time.time()

return (t1 - t0) / 10
34 changes: 34 additions & 0 deletions benchmarks/benchmarks/api/bench_in_degrees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
# in_degrees on coo is not supported on cuda
@utils.parametrize_cpu('format', ['coo', 'csc'])
@utils.parametrize_gpu('format', ['csc'])
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
nids = torch.tensor(nids, device=device, dtype=torch.int64)

# dry run
for i in range(10):
out = graph.in_degrees(i)

# timing
t0 = time.time()
for i in range(10):
edges = graph.in_degrees(nids)
t1 = time.time()

return (t1 - t0) / 10
34 changes: 34 additions & 0 deletions benchmarks/benchmarks/api/bench_in_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
# in_edges on coo is not supported on cuda
@utils.parametrize_cpu('format', ['coo', 'csc'])
@utils.parametrize_gpu('format', ['csc'])
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
nids = torch.tensor(nids, device=device, dtype=torch.int64)

# dry run
for i in range(10):
out = graph.in_edges(i)

# timing
t0 = time.time()
for i in range(10):
edges = graph.in_edges(nids)
t1 = time.time()

return (t1 - t0) / 10
92 changes: 81 additions & 11 deletions benchmarks/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,36 +26,70 @@ def _download(url, path, filename):
print('Download finished.')


def get_graph(name, format):
g = None
if name == 'cora':
g = dgl.data.CoraGraphDataset()[0]
elif name == 'livejournal':
bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = get_livejournal().formats([format])
dgl.save_graphs(bin_path, [g])
elif name == "friendster":
bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = get_friendster().formats([format])
dgl.save_graphs(bin_path, [g])
elif name == "reddit":
bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = dgl.data.RedditDataset(self_loop=True)[0].formats([format])
dgl.save_graphs(bin_path, [g])
else:
raise Exception("Unknown dataset")
g = g.formats([format])
return g


def get_livejournal():
# Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
'/tmp/dataset', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/dataset/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
'/tmp/dataset/livejournal', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/dataset/livejournal/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
print('construct the graph')
return dgl.graph((src, dst))


def get_filmbaster():
def get_friendster():
# Same as https://snap.stanford.edu/data/bigdata/communities/com-friendster.ungraph.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/friendster/com-friendster.ungraph.txt.gz',
'/tmp/dataset', 'com-friendster.ungraph.txt.gz')
df = pandas.read_csv('/tmp/dataset/com-friendster.ungraph.txt.gz', sep='\t', skiprows=4, header=None,
'/tmp/dataset/friendster', 'com-friendster.ungraph.txt.gz')
df = pandas.read_csv('/tmp/dataset/friendster/com-friendster.ungraph.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
print('construct the graph')
return dgl.graph((src, dst))


def get_graph(name):
if name == 'livejournal':
return get_livejournal()
else:
print(name + " doesn't exist")
return None
# def get_graph(name):
# if name == 'livejournal':
# return get_livejournal()
# else:
# print(name + " doesn't exist")
# return None


class OGBDataset(object):
Expand All @@ -79,6 +113,7 @@ def predict_category(self):
def __getitem__(self, idx):
return self._g


def load_ogb_product():
name = 'ogbn-products'
from ogb.nodeproppred import DglNodePropPredDataset
Expand Down Expand Up @@ -111,6 +146,7 @@ def load_ogb_product():

return OGBDataset(graph, num_labels)


def load_ogb_mag():
name = 'ogbn-mag'
from ogb.nodeproppred import DglNodePropPredDataset
Expand Down Expand Up @@ -146,6 +182,7 @@ def load_ogb_mag():
num_classes = dataset.num_classes
return OGBDataset(hg, num_classes, 'paper')


class PinsageDataset:
def __init__(self, g, user_ntype, item_ntype, textset):
self._g = g
Expand Down Expand Up @@ -334,6 +371,14 @@ def _wrapper(func):
return _wrapper


def noop_decorator(param_name, params):
"""noop decorator
"""
def _wrapper(func):
return func
return _wrapper


class TestFilter:
def __init__(self):
self.conf = None
Expand Down Expand Up @@ -367,6 +412,31 @@ def check(self, func):
filter = TestFilter()


device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')

if device == "cpu":
parametrize_cpu = parametrize
parametrize_gpu = noop_decorator
elif device == "gpu":
parametrize_cpu = noop_decorator
parametrize_gpu = parametrize
else:
raise Exception("Unknown device")


def skip_if_gpu():
"""skip if DGL_BENCH_DEVICE is gpu
"""
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')

def _wrapper(func):
if device == "gpu":
# skip if not enabled
func.benchmark_name = "skip_" + func.__name__
return func
return _wrapper


def benchmark(track_type, timeout=60):
"""Decorator for indicating the benchmark type.
Expand Down

0 comments on commit c29daae

Please sign in to comment.