[Distributed] add distributed in-degree and out-degree. (dmlc#2918)

* add distributed in-degree and out-degree. * update comments. * fix a bug. * add tests. * add tests. * fix a bug. * fix docstring. * update doc. * fix * fix. Co-authored-by: Zheng <[email protected]> Co-authored-by: xiang song(charlie.song) <[email protected]>
XINyexun · May 18, 2021 · 6e7f19f · 6e7f19f
1 parent 50492d5
commit 6e7f19f
Show file tree

Hide file tree

Showing 9 changed files with 414 additions and 34 deletions.
diff --git a/docs/source/api/python/dgl.distributed.rst b/docs/source/api/python/dgl.distributed.rst
@@ -17,7 +17,7 @@ Distributed Graph
 -----------------
 
 .. autoclass:: DistGraph
-    :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition, num_nodes, num_edges, get_node_partition_policy, get_edge_partition_policy, get_etype_id, get_ntype_id, nodes, edges
+    :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition, num_nodes, num_edges, get_node_partition_policy, get_edge_partition_policy, get_etype_id, get_ntype_id, nodes, edges, out_degrees, in_degrees
 
 Distributed Tensor
 ------------------

diff --git a/python/dgl/distributed/__init__.py b/python/dgl/distributed/__init__.py
@@ -29,4 +29,4 @@
 from .kvstore import KVServer, KVClient
 from .server_state import ServerState
 from .dist_dataloader import DistDataLoader
-from .graph_services import sample_neighbors, in_subgraph, find_edges
+from .graph_services import sample_neighbors, in_subgraph
diff --git a/python/dgl/distributed/dist_graph.py b/python/dgl/distributed/dist_graph.py
@@ -9,7 +9,7 @@
 from ..heterograph import DGLHeteroGraph
 from .. import heterograph_index
 from .. import backend as F
-from ..base import NID, EID, NTYPE, ETYPE
+from ..base import NID, EID, NTYPE, ETYPE, ALL, is_all
 from .kvstore import KVServer, get_kvstore
 from .._ffi.ndarray import empty_shared_mem
 from ..frame import infer_scheme
@@ -23,6 +23,8 @@
 from .server_state import ServerState
 from .rpc_server import start_server
 from .graph_services import find_edges as dist_find_edges
+from .graph_services import out_degrees as dist_out_degrees
+from .graph_services import in_degrees as dist_in_degrees
 from .dist_tensor import DistTensor
 
 INIT_GRAPH = 800001
@@ -745,6 +747,104 @@ def num_edges(self, etype=None):
                 return sum([self._gpb._num_edges(etype) for etype in self.etypes])
         return self._gpb._num_edges(etype)
 
+    def out_degrees(self, u=ALL):
+        """Return the out-degree(s) of the given nodes.
+
+        It computes the out-degree(s).
+        It does not support heterogeneous graphs yet.
+
+        Parameters
+        ----------
+        u : node IDs
+            The node IDs. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+            If not given, return the in-degrees of all the nodes.
+
+        Returns
+        -------
+        int or Tensor
+            The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
+
+        Examples
+        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for all nodes.
+
+        >>> g.out_degrees()
+        tensor([2, 2, 0, 0])
+
+        Query for nodes 1 and 2.
+
+        >>> g.out_degrees(torch.tensor([1, 2]))
+        tensor([2, 0])
+
+        See Also
+        --------
+        in_degrees
+        """
+        if is_all(u):
+            u = F.arange(0, self.number_of_nodes())
+        return dist_out_degrees(self, u)
+
+    def in_degrees(self, v=ALL):
+        """Return the in-degree(s) of the given nodes.
+
+        It computes the in-degree(s).
+        It does not support heterogeneous graphs yet.
+
+        Parameters
+        ----------
+        v : node IDs
+            The node IDs. The allowed formats are:
+
+            * ``int``: A single node.
+            * Int Tensor: Each element is a node ID. The tensor must have the same device type
+              and ID data type as the graph's.
+            * iterable[int]: Each element is a node ID.
+
+            If not given, return the in-degrees of all the nodes.
+
+        Returns
+        -------
+        int or Tensor
+            The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree
+            of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
+
+        Examples
+        --------
+        The following example uses PyTorch backend.
+
+        >>> import dgl
+        >>> import torch
+
+        Query for all nodes.
+
+        >>> g.in_degrees()
+        tensor([0, 2, 1, 1])
+
+        Query for nodes 1 and 2.
+
+        >>> g.in_degrees(torch.tensor([1, 2]))
+        tensor([2, 1])
+
+        See Also
+        --------
+        out_degrees
+        """
+        if is_all(v):
+            v = F.arange(0, self.number_of_nodes())
+        return dist_in_degrees(self, v)
+
     def node_attr_schemes(self):
         """Return the node feature schemes.
 

diff --git a/python/dgl/distributed/graph_services.py b/python/dgl/distributed/graph_services.py
@@ -15,6 +15,8 @@
 SAMPLING_SERVICE_ID = 6657
 INSUBGRAPH_SERVICE_ID = 6658
 EDGES_SERVICE_ID = 6659
+OUTDEGREE_SERVICE_ID = 6660
+INDEGREE_SERVICE_ID = 6661
 
 class SubgraphResponse(Response):
     """The response for sampling and in_subgraph"""
@@ -76,6 +78,20 @@ def _find_edges(local_g, partition_book, seed_edges):
     global_dst = global_nid_mapping[local_dst]
     return global_src, global_dst
 
+def _in_degrees(local_g, partition_book, n):
+    """Get in-degree of the nodes in the local partition.
+    """
+    local_nids = partition_book.nid2localnid(n, partition_book.partid)
+    local_nids = F.astype(local_nids, local_g.idtype)
+    return local_g.in_degrees(local_nids)
+
+def _out_degrees(local_g, partition_book, n):
+    """Get out-degree of the nodes in the local partition.
+    """
+    local_nids = partition_book.nid2localnid(n, partition_book.partid)
+    local_nids = F.astype(local_nids, local_g.idtype)
+    return local_g.out_degrees(local_nids)
+
 def _in_subgraph(local_g, partition_book, seed_nodes):
     """ Get in subgraph from local partition.
 
@@ -140,6 +156,72 @@ def process_request(self, server_state):
 
         return FindEdgeResponse(global_src, global_dst, self.order_id)
 
+class InDegreeRequest(Request):
+    """In-degree Request"""
+
+    def __init__(self, n, order_id):
+        self.n = n
+        self.order_id = order_id
+
+    def __setstate__(self, state):
+        self.n, self.order_id = state
+
+    def __getstate__(self):
+        return self.n, self.order_id
+
+    def process_request(self, server_state):
+        local_g = server_state.graph
+        partition_book = server_state.partition_book
+        deg = _in_degrees(local_g, partition_book, self.n)
+
+        return InDegreeResponse(deg, self.order_id)
+
+class InDegreeResponse(Response):
+    """The response for in-degree"""
+
+    def __init__(self, deg, order_id):
+        self.val = deg
+        self.order_id = order_id
+
+    def __setstate__(self, state):
+        self.val, self.order_id = state
+
+    def __getstate__(self):
+        return self.val, self.order_id
+
+class OutDegreeRequest(Request):
+    """Out-degree Request"""
+
+    def __init__(self, n, order_id):
+        self.n = n
+        self.order_id = order_id
+
+    def __setstate__(self, state):
+        self.n, self.order_id = state
+
+    def __getstate__(self):
+        return self.n, self.order_id
+
+    def process_request(self, server_state):
+        local_g = server_state.graph
+        partition_book = server_state.partition_book
+        deg = _out_degrees(local_g, partition_book, self.n)
+
+        return OutDegreeResponse(deg, self.order_id)
+
+class OutDegreeResponse(Response):
+    """The response for out-degree"""
+
+    def __init__(self, deg, order_id):
+        self.val = deg
+        self.order_id = order_id
+
+    def __setstate__(self, state):
+        self.val, self.order_id = state
+
+    def __getstate__(self):
+        return self.val, self.order_id
+
 class InSubgraphRequest(Request):
     """InSubgraph Request"""
 
@@ -410,11 +492,11 @@ def find_edges(g, edge_ids):
     tensor
         The destination node ID array.
     """
-    def issue_remove_req(edge_ids, order_id):
+    def issue_remote_req(edge_ids, order_id):
         return EdgesRequest(edge_ids, order_id)
     def local_access(local_g, partition_book, edge_ids):
         return _find_edges(local_g, partition_book, edge_ids)
-    return _distributed_edge_access(g, edge_ids, issue_remove_req, local_access)
+    return _distributed_edge_access(g, edge_ids, issue_remote_req, local_access)
 
 def in_subgraph(g, nodes):
     """Return the subgraph induced on the inbound edges of the given nodes.
@@ -452,6 +534,70 @@ def local_access(local_g, partition_book, local_nids):
         return _in_subgraph(local_g, partition_book, local_nids)
     return _distributed_access(g, nodes, issue_remote_req, local_access)
 
+def _distributed_get_node_property(g, n, issue_remote_req, local_access):
+    req_list = []
+    partition_book = g.get_partition_book()
+    n = toindex(n).tousertensor()
+    partition_id = partition_book.nid2partid(n)
+    local_nids = None
+    reorder_idx = []
+    for pid in range(partition_book.num_partitions()):
+        mask = (partition_id == pid)
+        nid = F.boolean_mask(n, mask)
+        reorder_idx.append(F.nonzero_1d(mask))
+        if pid == partition_book.partid and g.local_partition is not None:
+            assert local_nids is None
+            local_nids = nid
+        elif len(nid) != 0:
+            req = issue_remote_req(nid, pid)
+            req_list.append((pid, req))
+
+    # send requests to the remote machine.
+    msgseq2pos = None
+    if len(req_list) > 0:
+        msgseq2pos = send_requests_to_machine(req_list)
+
+    # handle edges in local partition.
+    vals = None
+    if local_nids is not None:
+        local_vals = local_access(g.local_partition, partition_book, local_nids)
+        shape = list(F.shape(local_vals))
+        shape[0] = len(n)
+        vals = F.zeros(shape, F.dtype(local_vals), F.cpu())
+        vals = F.scatter_row(vals, reorder_idx[partition_book.partid], local_vals)
+
+    # receive responses from remote machines.
+    if msgseq2pos is not None:
+        results = recv_responses(msgseq2pos)
+        if len(results) > 0 and vals is None:
+            shape = list(F.shape(results[0].val))
+            shape[0] = len(n)
+            vals = F.zeros(shape, F.dtype(results[0].val), F.cpu())
+        for result in results:
+            val = result.val
+            vals = F.scatter_row(vals, reorder_idx[result.order_id], val)
+    return vals
+
+def in_degrees(g, v):
+    '''Get in-degrees
+    '''
+    def issue_remote_req(v, order_id):
+        return InDegreeRequest(v, order_id)
+    def local_access(local_g, partition_book, v):
+        return _in_degrees(local_g, partition_book, v)
+    return _distributed_get_node_property(g, v, issue_remote_req, local_access)
+
+def out_degrees(g, u):
+    '''Get out-degrees
+    '''
+    def issue_remote_req(u, order_id):
+        return OutDegreeRequest(u, order_id)
+    def local_access(local_g, partition_book, u):
+        return _out_degrees(local_g, partition_book, u)
+    return _distributed_get_node_property(g, u, issue_remote_req, local_access)
+
 register_service(SAMPLING_SERVICE_ID, SamplingRequest, SubgraphResponse)
 register_service(EDGES_SERVICE_ID, EdgesRequest, FindEdgeResponse)
 register_service(INSUBGRAPH_SERVICE_ID, InSubgraphRequest, SubgraphResponse)
+register_service(OUTDEGREE_SERVICE_ID, OutDegreeRequest, OutDegreeResponse)
+register_service(INDEGREE_SERVICE_ID, InDegreeRequest, InDegreeResponse)
diff --git a/python/dgl/partition.py b/python/dgl/partition.py
@@ -171,39 +171,54 @@ def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False):
     node_part = node_part.tousertensor()
     start = time.time()
 
+    # This function determines whether an edge belongs to a partition.
+    # An edge is assigned to a partition based on its destination node. If its destination node
+    # is assigned to a partition, we assign the edge to the partition as well.
+    def get_inner_edge(subg, inner_node):
+        inner_edge = F.zeros((subg.number_of_edges(),), F.int8, F.cpu())
+        inner_nids = F.nonzero_1d(inner_node)
+        # TODO(zhengda) we need to fix utils.toindex() to avoid the dtype cast below.
+        inner_nids = F.astype(inner_nids, F.int64)
+        inner_eids = subg.in_edges(inner_nids, form='eid')
+        inner_edge = F.scatter_row(inner_edge, inner_eids,
+                                   F.ones((len(inner_eids),), F.dtype(inner_edge), F.cpu()))
+        return inner_edge
+
     # This creaets a subgraph from subgraphs returned from the CAPI above.
-    def create_subgraph(subg, induced_nodes, induced_edges):
+    def create_subgraph(subg, induced_nodes, induced_edges, inner_node):
         subg1 = DGLHeteroGraph(gidx=subg.graph, ntypes=['_N'], etypes=['_E'])
         # If IDs are shuffled, we should shuffled edges. This will help us collect edge data
         # from the distributed graph after training.
         if reshuffle:
-            sorted_edges, index = F.sort_1d(induced_edges[0])
+            # When we shuffle edges, we need to make sure that the inner edges are assigned with
+            # contiguous edge IDs and their ID range starts with 0. In other words, we want to
+            # place these edge IDs in the front of the edge list. To ensure that, we add the IDs
+            # of outer edges with a large value, so we will get the sorted list as we want.
+            max_eid = F.max(induced_edges[0], 0) + 1
+            inner_edge = get_inner_edge(subg1, inner_node)
+            eid = F.astype(induced_edges[0], F.int64) + max_eid * F.astype(inner_edge == 0, F.int64)
+
+            _, index = F.sort_1d(eid)
             subg1 = edge_subgraph(subg1, index, preserve_nodes=True)
             subg1.ndata[NID] = induced_nodes[0]
-            subg1.edata[EID] = sorted_edges
+            subg1.edata[EID] = F.gather_row(induced_edges[0], index)
         else:
             subg1.ndata[NID] = induced_nodes[0]
             subg1.edata[EID] = induced_edges[0]
         return subg1
 
     for i, subg in enumerate(subgs):
         inner_node = _get_halo_heterosubgraph_inner_node(subg)
-        subg = create_subgraph(subg, subg.induced_nodes, subg.induced_edges)
         inner_node = F.zerocopy_from_dlpack(inner_node.to_dlpack())
+        subg = create_subgraph(subg, subg.induced_nodes, subg.induced_edges, inner_node)
         subg.ndata['inner_node'] = inner_node
         subg.ndata['part_id'] = F.gather_row(node_part, subg.ndata[NID])
         if reshuffle:
             subg.ndata['orig_id'] = F.gather_row(orig_nids, subg.ndata[NID])
             subg.edata['orig_id'] = F.gather_row(orig_eids, subg.edata[EID])
 
         if extra_cached_hops >= 1:
-            inner_edge = F.zeros((subg.number_of_edges(),), F.int8, F.cpu())
-            inner_nids = F.nonzero_1d(subg.ndata['inner_node'])
-            # TODO(zhengda) we need to fix utils.toindex() to avoid the dtype cast below.
-            inner_nids = F.astype(inner_nids, F.int64)
-            inner_eids = subg.in_edges(inner_nids, form='eid')
-            inner_edge = F.scatter_row(inner_edge, inner_eids,
-                                       F.ones((len(inner_eids),), F.dtype(inner_edge), F.cpu()))
+            inner_edge = get_inner_edge(subg, inner_node)
         else:
             inner_edge = F.ones((subg.number_of_edges(),), F.int8, F.cpu())
         subg.edata['inner_edge'] = inner_edge