Skip to content

Commit

Permalink
[Feature] Range partition (dmlc#1522)
Browse files Browse the repository at this point in the history
* add reorder immutable graph.

* add python API.

* add reorder for csr.

* remove gk version.

* fix

* add cpp test.

* bug fixes

* fix tests.

* fix bugs and add check

* fix test.

* add omp.

* add comments.

* add coo reorder.

* fix a bug.

* handle reorder for different graph structues.

* fix lint.

* fix.

* add original ids.

* reshuffle nodes before metis partition.

* inner nodes are in contiguous Id range.

* reshuffle nodes/edges when partitioning.

* load partition return graph partition book.

* use inner_node/inner_edges

* add and test range partition book.

* count inner_edge correctly.

* fix lint.

* fix lint.

* fix lint.

* fix errors.

* fix errors.

* fix for TF.

* fix.

* fix.

* change docstring.

* support logical and.

* add comments.

* avoid copy.

* fix

* update docstring.

* fix a bug.

* add range search.

* fix

* fix a bug.

* add more tests.

* load graph partition book.

* support shared memory for range partition book.

* fix a bug.

* fix.

* fix lint.

* remove check

* fix test.

* remove num_nodes and num_edges

* fix lint.

* fix graph partition book.

* address comments.

* use makedirs.

* fix compile

Co-authored-by: xiang song(charlie.song) <[email protected]>
Co-authored-by: Chao Ma <[email protected]>
  • Loading branch information
3 people authored Jun 15, 2020
1 parent 3e72c53 commit 41349dc
Show file tree
Hide file tree
Showing 21 changed files with 891 additions and 208 deletions.
16 changes: 16 additions & 0 deletions include/dgl/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,14 @@ bool CSRHasDuplicate(CSRMatrix csr);
*/
void CSRSort_(CSRMatrix* csr);

/*!
* \brief Reorder the rows and colmns according to the new row and column order.
* \param csr The input csr matrix.
* \param new_row_ids the new row Ids (the index is the old row Id)
* \param new_col_ids the new column Ids (the index is the old col Id).
*/
CSRMatrix CSRReorder(CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids);

/*!
* \brief Remove entries from CSR matrix by entry indices (data indices)
* \return A new CSR matrix as well as a mapping from the new CSR entries to the old CSR
Expand Down Expand Up @@ -778,6 +786,14 @@ COOMatrix COOSort(COOMatrix mat, bool sort_column = false);
*/
COOMatrix COORemove(COOMatrix coo, IdArray entries);

/*!
* \brief Reorder the rows and colmns according to the new row and column order.
* \param csr The input coo matrix.
* \param new_row_ids the new row Ids (the index is the old row Id)
* \param new_col_ids the new column Ids (the index is the old col Id).
*/
COOMatrix COOReorder(COOMatrix coo, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids);

/*!
* \brief Randomly select a fixed number of non-zero entries along each given row independently.
*
Expand Down
8 changes: 8 additions & 0 deletions include/dgl/graph_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ class GraphOp {
*/
static HaloSubgraph GetSubgraphWithHalo(GraphPtr graph, IdArray nodes, int num_hops);

/*!
* \brief Reorder the nodes in the immutable graph.
* \param graph The input graph.
* \param new_order The node Ids in the new graph. The index in `new_order` is old node Ids.
* \return the graph with reordered node Ids
*/
static GraphPtr ReorderImmutableGraph(ImmutableGraphPtr ig, IdArray new_order);

/*!
* \brief Partition a graph with Metis.
* The partitioning algorithm assigns each vertex to a partition.
Expand Down
3 changes: 3 additions & 0 deletions python/dgl/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,9 @@ def logical_not(input):
"""
pass

def logical_and(input1, input2):
pass

def clone(input):
"""Return a clone of the input tensor.
Expand Down
3 changes: 3 additions & 0 deletions python/dgl/backend/mxnet/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ def equal(x, y):
def logical_not(input):
return nd.logical_not(input)

def logical_and(input1, input2):
return nd.logical_and(input1, input2)

def clone(input):
return input.copy()

Expand Down
3 changes: 3 additions & 0 deletions python/dgl/backend/pytorch/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ def equal(x, y):
def logical_not(input):
return ~input

def logical_and(input1, input2):
return input1 & input2

def clone(input):
return input.clone()

Expand Down
3 changes: 3 additions & 0 deletions python/dgl/backend/tensorflow/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ def equal(x, y):
def logical_not(input):
return ~input

def logical_and(input1, input2):
return tf.math.logical_and(input1, input2)

def clone(input):
# TF tensor is always immutable so returning the input is safe.
return input
Expand Down
33 changes: 18 additions & 15 deletions python/dgl/distributed/dist_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .._ffi.ndarray import empty_shared_mem
from ..frame import infer_scheme
from .partition import load_partition
from .graph_partition_book import GraphPartitionBook, PartitionPolicy, get_shared_mem_partition_book
from .graph_partition_book import PartitionPolicy, get_shared_mem_partition_book
from .. import utils
from .shared_mem_utils import _to_shared_mem, _get_ndata_path, _get_edata_path, DTYPE_DICT
from .rpc_client import connect_to_server
Expand All @@ -25,17 +25,16 @@ def _copy_graph_to_shared_mem(g, graph_name):
new_g = DGLGraph(gidx)
# We should share the node/edge data to the client explicitly instead of putting them
# in the KVStore because some of the node/edge data may be duplicated.
local_node_path = _get_ndata_path(graph_name, 'local_node')
new_g.ndata['local_node'] = _to_shared_mem(g.ndata['local_node'],
local_node_path)
local_edge_path = _get_edata_path(graph_name, 'local_edge')
new_g.edata['local_edge'] = _to_shared_mem(g.edata['local_edge'], local_edge_path)
local_node_path = _get_ndata_path(graph_name, 'inner_node')
new_g.ndata['inner_node'] = _to_shared_mem(g.ndata['inner_node'], local_node_path)
local_edge_path = _get_edata_path(graph_name, 'inner_edge')
new_g.edata['inner_edge'] = _to_shared_mem(g.edata['inner_edge'], local_edge_path)
new_g.ndata[NID] = _to_shared_mem(g.ndata[NID], _get_ndata_path(graph_name, NID))
new_g.edata[EID] = _to_shared_mem(g.edata[EID], _get_edata_path(graph_name, EID))
return new_g

FIELD_DICT = {'local_node': F.int64,
'local_edge': F.int64,
FIELD_DICT = {'inner_node': F.int64,
'inner_edge': F.int64,
NID: F.int64,
EID: F.int64}

Expand Down Expand Up @@ -99,8 +98,8 @@ def _get_graph_from_shared_mem(graph_name):
return gidx

g = DGLGraph(gidx)
g.ndata['local_node'] = _get_shared_mem_ndata(g, graph_name, 'local_node')
g.edata['local_edge'] = _get_shared_mem_edata(g, graph_name, 'local_edge')
g.ndata['inner_node'] = _get_shared_mem_ndata(g, graph_name, 'inner_node')
g.edata['inner_edge'] = _get_shared_mem_edata(g, graph_name, 'inner_edge')
g.ndata[NID] = _get_shared_mem_ndata(g, graph_name, NID)
g.edata[EID] = _get_shared_mem_edata(g, graph_name, EID)
return g
Expand Down Expand Up @@ -271,12 +270,10 @@ def __init__(self, server_id, ip_config, num_clients, graph_name, conf_file):
self.ip_config = ip_config

# Load graph partition data.
self.client_g, node_feats, edge_feats, self.meta = load_partition(conf_file, server_id)
_, _, node_map, edge_map, num_partitions = self.meta
self.client_g, node_feats, edge_feats, self.gpb = load_partition(conf_file, server_id)
self.client_g = _copy_graph_to_shared_mem(self.client_g, graph_name)

# Init kvstore.
self.gpb = GraphPartitionBook(server_id, num_partitions, node_map, edge_map, self.client_g)
self.gpb.shared_memory(graph_name)
self.add_part_policy(PartitionPolicy('node', server_id, self.gpb))
self.add_part_policy(PartitionPolicy('edge', server_id, self.gpb))
Expand Down Expand Up @@ -332,6 +329,12 @@ def __init__(self, ip_config, graph_name):
self._default_init_ndata = _default_init_data
self._default_init_edata = _default_init_data

self._num_nodes = 0
self._num_edges = 0
for part_md in self._gpb.metadata():
self._num_nodes += int(part_md['num_nodes'])
self._num_edges += int(part_md['num_edges'])


def init_ndata(self, ndata_name, shape, dtype):
'''Initialize node data
Expand Down Expand Up @@ -425,11 +428,11 @@ def edata(self):

def number_of_nodes(self):
"""Return the number of nodes"""
return self._gpb.num_nodes()
return self._num_nodes

def number_of_edges(self):
"""Return the number of edges"""
return self._gpb.num_edges()
return self._num_edges

def node_attr_schemes(self):
"""Return the node feature and embedding schemes."""
Expand Down
Loading

0 comments on commit 41349dc

Please sign in to comment.