Skip to content

Commit

Permalink
[Distributed] Fix partition (dmlc#1821)
Browse files Browse the repository at this point in the history
* fix partition and print timing.

* fix lint.

Co-authored-by: Ubuntu <[email protected]>
  • Loading branch information
zheng-da and Ubuntu authored Jul 17, 2020
1 parent df3683a commit 2073463
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
7 changes: 5 additions & 2 deletions examples/pytorch/graphsage/load_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@ def load_reddit():
def load_ogb(name):
from ogb.nodeproppred import DglNodePropPredDataset

print('load', name)
data = DglNodePropPredDataset(name=name)
print('finish loading', name)
splitted_idx = data.get_idx_split()
graph, labels = data[0]
labels = labels[:, 0]

graph.ndata['features'] = graph.ndata['feat']
graph.ndata['labels'] = labels
in_feats = graph.ndata['features'].shape[1]
num_labels = len(th.unique(labels))
num_labels = len(th.unique(labels[th.logical_not(th.isnan(labels))]))

# Find the node IDs in the training, validation, and test set.
train_nid, val_nid, test_nid = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test']
Expand All @@ -44,7 +46,8 @@ def load_ogb(name):
graph.ndata['train_mask'] = train_mask
graph.ndata['val_mask'] = val_mask
graph.ndata['test_mask'] = test_mask
return graph, len(th.unique(graph.ndata['labels']))
print('finish constructing', name)
return graph, num_labels

def inductive_split(g):
"""Split the graph into training graph, validation graph, and test graph by training
Expand Down
5 changes: 5 additions & 0 deletions python/dgl/distributed/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@

import json
import os
import time
import numpy as np

from .. import backend as F
Expand Down Expand Up @@ -274,6 +275,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=

# Let's calculate edge assignment.
# TODO(zhengda) we should replace int64 with int16. int16 should be sufficient.
start = time.time()
if not reshuffle:
edge_parts = np.zeros((g.number_of_edges(),), dtype=np.int64) - 1
num_edges = 0
Expand All @@ -294,6 +296,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
ledges_list.append(local_edges)
assert num_edges == g.number_of_edges()
assert num_nodes == g.number_of_nodes()
print('Calculate edge assignment: {:.3f} seconds'.format(time.time() - start))

os.makedirs(out_path, mode=0o775, exist_ok=True)
tot_num_inner_edges = 0
Expand All @@ -317,6 +320,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
node_map_val = [g.number_of_nodes()]
edge_map_val = [g.number_of_edges()]

start = time.time()
part_metadata = {'graph_name': graph_name,
'num_nodes': g.number_of_nodes(),
'num_edges': g.number_of_edges(),
Expand Down Expand Up @@ -363,6 +367,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=

with open('{}/{}.json'.format(out_path, graph_name), 'w') as outfile:
json.dump(part_metadata, outfile, sort_keys=True, indent=4)
print('Save partitions: {:.3f} seconds'.format(time.time() - start))

num_cuts = g.number_of_edges() - tot_num_inner_edges
if num_parts == 1:
Expand Down
14 changes: 14 additions & 0 deletions python/dgl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from collections.abc import Iterable, Mapping
from collections import defaultdict
import time
import numpy as np
from scipy import sparse

from ._ffi.function import _init_api
from .graph import DGLGraph
from .heterograph import DGLHeteroGraph
Expand Down Expand Up @@ -949,6 +951,7 @@ def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False):
assert len(node_part) == g.number_of_nodes()
node_part = utils.toindex(node_part)
if reshuffle:
start = time.time()
node_part = node_part.tousertensor()
sorted_part, new2old_map = F.sort_1d(node_part)
new_node_ids = np.zeros((g.number_of_nodes(),), dtype=np.int64)
Expand All @@ -960,10 +963,14 @@ def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False):
orig_eids = _CAPI_DGLReassignEdges(g._graph, True)
orig_eids = utils.toindex(orig_eids)
g.edata['orig_id'] = orig_eids.tousertensor()
print('Reshuffle nodes and edges: {:.3f} seconds'.format(time.time() - start))

start = time.time()
subgs = _CAPI_DGLPartitionWithHalo(g._graph, node_part.todgltensor(), extra_cached_hops)
print('Split the graph: {:.3f} seconds'.format(time.time() - start))
subg_dict = {}
node_part = node_part.tousertensor()
start = time.time()
for i, subg in enumerate(subgs):
inner_node = _get_halo_subgraph_inner_node(subg)
subg = g._create_subgraph(subg, subg.induced_nodes, subg.induced_edges)
Expand All @@ -986,6 +993,7 @@ def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False):
inner_edge = F.ones((subg.number_of_edges(),), F.int64, F.cpu())
subg.edata['inner_edge'] = inner_edge
subg_dict[i] = subg
print('Construct subgraphs: {:.3f} seconds'.format(time.time() - start))
return subg_dict

def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
Expand Down Expand Up @@ -1021,7 +1029,9 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
'''
# METIS works only on symmetric graphs.
# The METIS runs on the symmetric graph to generate the node assignment to partitions.
start = time.time()
sym_g = to_bidirected_stale(g, readonly=True)
print('Convert a graph into a bidirected graph: {:.3f} seconds'.format(time.time() - start))
vwgt = []
# To balance the node types in each partition, we can take advantage of the vertex weights
# in Metis. When vertex weights are provided, Metis will tries to generate partitions with
Expand All @@ -1033,6 +1043,7 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
# if a node belongs to the first node type, its weight is set to 1; otherwise, 0.
# Similary, we set the second weight for the second node type and so on. The number
# of weights is the same as the number of node types.
start = time.time()
if balance_ntypes is not None:
assert len(balance_ntypes) == g.number_of_nodes(), \
"The length of balance_ntypes should be equal to #nodes in the graph"
Expand All @@ -1051,11 +1062,14 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
shape = (np.prod(F.shape(vwgt),),)
vwgt = F.reshape(vwgt, shape)
vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
print('Construct multi-constraint weights: {:.3f} seconds'.format(time.time() - start))
else:
vwgt = F.zeros((0,), F.int64, F.cpu())
vwgt = F.zerocopy_to_dgl_ndarray(vwgt)

start = time.time()
node_part = _CAPI_DGLMetisPartition(sym_g._graph, k, vwgt)
print('Metis partitioning: {:.3f} seconds'.format(time.time() - start))
if len(node_part) == 0:
return None
else:
Expand Down

0 comments on commit 2073463

Please sign in to comment.