Skip to content

Commit

Permalink
[Test] API benchmarks (dmlc#2542)
Browse files Browse the repository at this point in the history
* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

* new batch

* fix

* fix

* fix

* address comment

* fix warning

* fix

Co-authored-by: Minjie Wang <[email protected]>
  • Loading branch information
VoVAllen and jermainewang authored Jan 20, 2021
1 parent 8a2b54d commit 0c15657
Show file tree
Hide file tree
Showing 20 changed files with 219 additions and 17 deletions.
11 changes: 5 additions & 6 deletions benchmarks/benchmarks/api/bench_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
from .. import utils

@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256])
@utils.parametrize('batch_size', [4, 32, 256, 1024])
def track_time(batch_size):
device = utils.get_bench_device()

ds = dgl.data.QM7bDataset()
# prepare graph
graphs = []
for i in range(batch_size):
u = torch.randint(20, (40,))
v = torch.randint(20, (40,))
graphs.append(dgl.graph((u, v)).to(device))
for graph in ds[0:batch_size][0]:
g = graph.to(device)
graphs.append(g)

# dry run
for i in range(10):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/api/bench_edge_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['csr']) # csr/csc is not supported
@utils.parametrize('format', ['coo', 'csr', 'csc'])
@utils.parametrize('fraction', [0.01, 0.1])
@utils.parametrize('return_uv', [True, False])
def track_time(graph_name, format, fraction, return_uv):
Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmarks/api/bench_in_edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)

graph = graph.to(device)
nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
Expand Down
27 changes: 27 additions & 0 deletions benchmarks/benchmarks/api/bench_khop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=60)
@utils.parametrize('graph_name', ['cora'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('k', [1, 3, 5])
def track_time(graph_name, format, k):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
graph = graph.formats([format])
# dry run
dgl.khop_graph(graph, k)

# timing
t0 = time.time()
for i in range(10):
gg = dgl.khop_graph(graph, k)
t1 = time.time()

return (t1 - t0) / 10
25 changes: 25 additions & 0 deletions benchmarks/benchmarks/api/bench_knn_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=60)
@utils.parametrize('k', [3, 5, 10])
@utils.parametrize('size', [50, 200, ])
@utils.parametrize('dim', [16, 64, 128])
def track_time(size, dim, k):
device = utils.get_bench_device()
features = np.random.randn(size, dim)
feat = torch.tensor(features, dtype=torch.float, device=device)
# dry run
dgl.knn_graph(feat, k)
# timing
t0 = time.time()
for i in range(10):
dgl.knn_graph(feat, k)
t1 = time.time()

return (t1 - t0) / 10
27 changes: 27 additions & 0 deletions benchmarks/benchmarks/api/bench_metis_partition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize('graph_name', ['reddit'])
@utils.parametrize('k', [2, 4, 8])
def track_time(graph_name, k):
device = utils.get_bench_device()
data = utils.process_data(graph_name)
graph = data[0]
# dry run
dry_run_data = utils.process_data('pubmed')
gg = dgl.transform.metis_partition(dry_run_data[0], k)

# timing
t0 = time.time()
for i in range(3):
gg = dgl.transform.metis_partition(graph, k)
t1 = time.time()

return (t1 - t0) / 3
35 changes: 35 additions & 0 deletions benchmarks/benchmarks/api/bench_readout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import time
import dgl
import torch

from .. import utils


@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256])
@utils.parametrize('feat_size', [32, 128, 256])
@utils.parametrize('readout_op', ['sum', 'max', 'min', 'mean'])
@utils.parametrize('type', ['edge', 'node'])
def track_time(batch_size, feat_size, readout_op, type):
device = utils.get_bench_device()
ds = dgl.data.QM7bDataset()
# prepare graph
graphs = ds[0:batch_size][0]

g = dgl.batch(graphs).to(device)
if type == 'node':
g.ndata['h'] = torch.randn((g.num_nodes(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_nodes(g, 'h', readout_op)
t1 = time.time()
elif type == 'edge':
g.edata['h'] = torch.randn((g.num_edges(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_edges(g, 'h', readout_op)
t1 = time.time()
else:
raise Exception("Unknown type")

return (t1 - t0) / 10
27 changes: 27 additions & 0 deletions benchmarks/benchmarks/api/bench_reverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo', 'csc', 'csr'])
def track_time(graph_name, format):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
graph = graph.formats([format])
# dry run
dgl.reverse(graph)

# timing
t0 = time.time()
for i in range(10):
gg = dgl.reverse(graph)
t1 = time.time()

return (t1 - t0) / 10
35 changes: 35 additions & 0 deletions benchmarks/benchmarks/api/bench_to_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import time
import dgl
import torch
import numpy as np

from .. import utils


@utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize('graph_name', ['reddit', "ogbn-product"])
@utils.parametrize('num_seed_nodes', [32, 256, 1024, 2048])
@utils.parametrize('fanout', [5, 10, 20])
def track_time(graph_name, num_seed_nodes, fanout):
device = utils.get_bench_device()
data = utils.process_data(graph_name)
graph = data[0]

# dry run
dgl.sampling.sample_neighbors(graph, [1, 2, 3], fanout)

subg_list = []
for i in range(10):
seed_nodes = np.random.randint(
0, graph.num_nodes(), size=num_seed_nodes)
subg = dgl.sampling.sample_neighbors(graph, seed_nodes, fanout)
subg_list.append(subg)

# timing
t0 = time.time()
for i in range(10):
gg = dgl.to_block(subg_list[i])
t1 = time.time()

return (t1 - t0) / 10
26 changes: 26 additions & 0 deletions benchmarks/benchmarks/api/bench_unbatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import time
import dgl
import torch

from .. import utils

@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256, 1024])
def track_time(batch_size):
device = utils.get_bench_device()
ds = dgl.data.QM7bDataset()
# prepare graph
graphs = ds[0:batch_size][0]
bg = dgl.batch(graphs).to(device)

# dry run
for i in range(10):
glist = dgl.unbatch(bg)

# timing
t0 = time.time()
for i in range(100):
glist = dgl.unbatch(bg)
t1 = time.time()

return (t1 - t0) / 100
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_acc/bench_gat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask']

in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_acc/bench_gcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask']

in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_acc/bench_rgcn_ns.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats):
eval_seeds = []

with th.no_grad():
for sample_data in tqdm.tqdm(eval_loader):
for sample_data in eval_loader:
th.cuda.empty_cache()
seeds, blocks = sample_data
feats = embed_layer(blocks[0].srcdata[dgl.NID],
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_acc/bench_sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask']

in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_acc/bench_sage_ns.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def track_acc(data):
g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

# Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU.
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_speed/bench_gat.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def track_time(data):
test_mask = g.ndata['test_mask']

in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_speed/bench_gat_ns.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def track_time(data):
g.ndata['labels'] = g.ndata['label']
g = g.remove_self_loop().add_self_loop()
in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

# Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU.
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_speed/bench_sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def track_time(data):
test_mask = g.ndata['test_mask']

in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmarks/model_speed/bench_sage_ns.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def track_time(data):
g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

# Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def track_time(data):
g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels
n_classes = data.num_classes

# Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU.
Expand Down

0 comments on commit 0c15657

Please sign in to comment.