[API] Graph traversal (dmlc#103)

* bfs, dfs and topological traversal * dfs and test cases * Conflicts: python/dgl/graph.py src/graph/graph.cc src/graph/graph_op.cc * documentation * requested changes * Conflicts: Jenkinsfile examples/pytorch/gcn/gcn.py examples/pytorch/gcn/gcn_spmv.py python/dgl/graph.py python/dgl/graph_index.py src/graph/graph.cc src/graph/graph_op.cc * Conflicts: Jenkinsfile python/dgl/graph_index.py * fix lint errors * fix lint errors * fix lint errors * fix test cases * requested changes * traversal interface * [Bug] fixed a typo that caused syntax error (dmlc#120) * WIP * bfs nodes generator works * topological traversal * WIP: dfs_edges * dfs edges * dfs labeled edges * utest for traversal * fix lint * fix utest * code clean * changes as requested
SunYa0 · Nov 8, 2018 · 2319167 · 2319167
1 parent 79cecce
commit 2319167
Show file tree

Hide file tree

Showing 15 changed files with 776 additions and 47 deletions.
diff --git a/examples/pytorch/tree_lstm/tree_lstm.py b/examples/pytorch/tree_lstm/tree_lstm.py
@@ -108,9 +108,7 @@ def forward(self, graph, zero_initializer, h=None, c=None, iterator=None, train=
         g.set_n_repr({'x' : x, 'h' : h, 'c' : c, 'h_tild' : h_tild, 'c_tild' : c_tild})
         # TODO(minjie): potential bottleneck
         if iterator is None:
-            for frontier in topological_traverse(g):
-                #print('frontier', frontier)
-                g.pull(frontier)
+            g.propagate('topo')
         else:
             for frontier in iterator:
                 g.pull(frontier)

diff --git a/include/dgl/graph.h b/include/dgl/graph.h
@@ -8,6 +8,8 @@
 
 #include <vector>
 #include <cstdint>
+#include <utility>
+#include <tuple>
 #include "runtime/ndarray.h"
 
 namespace dgl {
@@ -16,6 +18,7 @@ typedef uint64_t dgl_id_t;
 typedef tvm::runtime::NDArray IdArray;
 typedef tvm::runtime::NDArray DegreeArray;
 typedef tvm::runtime::NDArray BoolArray;
+typedef tvm::runtime::NDArray IntArray;
 
 class Graph;
 class GraphOp;
@@ -301,6 +304,42 @@ class Graph {
    */
   Graph Reverse() const;
 
+  /*!
+   * \brief Return the successor vector
+   * \param vid The vertex id.
+   * \return the successor vector
+   */
+  const std::vector<dgl_id_t>& SuccVec(dgl_id_t vid) const {
+    return adjlist_[vid].succ;
+  }
+
+  /*!
+   * \brief Return the out edge id vector
+   * \param vid The vertex id.
+   * \return the out edge id vector
+   */
+  const std::vector<dgl_id_t>& OutEdgeVec(dgl_id_t vid) const {
+    return adjlist_[vid].edge_id;
+  }
+
+  /*!
+   * \brief Return the predecessor vector
+   * \param vid The vertex id.
+   * \return the predecessor vector
+   */
+  const std::vector<dgl_id_t>& PredVec(dgl_id_t vid) const {
+    return reverse_adjlist_[vid].succ;
+  }
+
+  /*!
+   * \brief Return the in edge id vector
+   * \param vid The vertex id.
+   * \return the in edge id vector
+   */
+  const std::vector<dgl_id_t>& InEdgeVec(dgl_id_t vid) const {
+    return reverse_adjlist_[vid].edge_id;
+  }
+
  protected:
   friend class GraphOp;
   /*! \brief Internal edge list type */

diff --git a/include/dgl/graph_op.h b/include/dgl/graph_op.h
@@ -25,6 +25,7 @@ class GraphOp {
    * \return the line graph
    */
   static Graph LineGraph(const Graph* graph, bool backtracking);
+
   /*!
    * \brief Return a disjoint union of the input graphs.
    *

diff --git a/python/dgl/__init__.py b/python/dgl/__init__.py
@@ -11,4 +11,5 @@
 from .batched_graph import *
 from .graph import DGLGraph
 from .subgraph import DGLSubGraph
+from .traversal import *
 from .udf import NodeBatch, EdgeBatch
diff --git a/python/dgl/graph.py b/python/dgl/graph.py
@@ -1,5 +1,4 @@
-"""Base graph class specialized for neural networks on graphs.
-"""
+"""Base graph class specialized for neural networks on graphs."""
 from __future__ import absolute_import
 
 import networkx as nx
@@ -336,7 +335,7 @@ def find_edges(self, eid):
         tensor, tensor
         The source and destination node IDs.
         """
-        eid = utils.toindex(u)
+        eid = utils.toindex(eid)
         src, dst, _ = self._graph.find_edges(eid)
         return src.tousertensor(), dst.tousertensor()
 
@@ -1207,40 +1206,60 @@ def update_all(self,
             self.send(ALL, message_func)
             self.recv(ALL, reduce_func, apply_node_func)
 
-    def propagate(self,
-                  traverser='topo',
-                  message_func="default",
-                  reduce_func="default",
-                  apply_node_func="default",
-                  **kwargs):
-        """Propagate messages and update nodes using graph traversal.
+    def prop_nodes(self,
+                   nodes_generator,
+                   message_func="default",
+                   reduce_func="default",
+                   apply_node_func="default"):
+        """Propagate messages using graph traversal by triggering pull() on nodes.
 
-        A convenient function for passing messages and updating
-        nodes according to the traverser. The traverser can be
-        any of the pre-defined traverser (e.g. 'topo'). User can also provide custom
-        traverser that generates the edges and nodes.
+        The traversal order is specified by the ``nodes_generator``. It generates
+        node frontiers, which is a list or a tensor of nodes. The nodes in the
+        same frontier will be triggered together, while nodes in different frontiers
+        will be triggered according to the generating order.
 
         Parameters
         ----------
-        traverser : str or generator of edges.
-          The traverser of the graph.
-        message_func : str or callable
-          The message function.
-        reduce_func : str or callable
-          The reduce function.
-        apply_node_func : str or callable
-          The update function.
-        kwargs : keyword arguments, optional
-            Arguments for pre-defined iterators.
+        node_generators : generator
+            The generator of node frontiers.
+        message_func : str or callable, optional
+            The message function.
+        reduce_func : str or callable, optional
+            The reduce function.
+        apply_node_func : str or callable, optional
+            The update function.
         """
-        if isinstance(traverser, str):
-            # TODO(minjie): Call pre-defined routine to unroll the computation.
-            raise RuntimeError('Not implemented.')
-        else:
-            # NOTE: the iteration can return multiple edges at each step.
-            for u, v in traverser:
-                self.send_and_recv((u, v),
-                        message_func, reduce_func, apply_node_func)
+        for node_frontier in nodes_generator:
+            self.pull(node_frontier,
+                    message_func, reduce_func, apply_node_func)
+
+    def prop_edges(self,
+                   edge_generator,
+                   message_func="default",
+                   reduce_func="default",
+                   apply_node_func="default"):
+        """Propagate messages using graph traversal by triggering send_and_recv() on edges.
+
+        The traversal order is specified by the ``edges_generator``. It
+        generates edge frontiers, which is a list or a tensor of edge ids or
+        end points.  The edges in the same frontier will be triggered together,
+        while edges in different frontiers will be triggered according to the
+        generating order.
+
+        Parameters
+        ----------
+        edge_generators : generator
+            The generator of edge frontiers.
+        message_func : str or callable, optional
+            The message function.
+        reduce_func : str or callable, optional
+            The reduce function.
+        apply_node_func : str or callable, optional
+            The update function.
+        """
+        for edge_frontier in edge_generator:
+            self.send_and_recv(edge_frontier,
+                    message_func, reduce_func, apply_node_func)
 
     def subgraph(self, nodes):
         """Generate the subgraph among the given nodes.
@@ -1337,7 +1356,7 @@ def merge(self, subgraphs, reduce_func='sum'):
                 self._edge_frame.num_rows,
                 reduce_func)
 
-    def adjacency_matrix(self, ctx=None):
+    def adjacency_matrix(self, ctx=F.cpu()):
         """Return the adjacency matrix representation of this graph.
 
         Parameters
@@ -1352,7 +1371,7 @@ def adjacency_matrix(self, ctx=None):
         """
         return self._graph.adjacency_matrix().get(ctx)
 
-    def incidence_matrix(self, oriented=False, ctx=None):
+    def incidence_matrix(self, oriented=False, ctx=F.cpu()):
         """Return the incidence matrix representation of this graph.
 
         Parameters

diff --git a/python/dgl/traversal.py b/python/dgl/traversal.py
@@ -0,0 +1,151 @@
+"""Module for graph traversal methods."""
+from __future__ import absolute_import
+
+from ._ffi.function import _init_api
+from . import backend as F
+from . import utils
+
+__all__ = ['bfs_nodes_generator', 'topological_nodes_generator',
+           'dfs_edges_generator', 'dfs_labeled_edges_generator',]
+
+def bfs_nodes_generator(graph, source, reversed=False):
+    """Node frontiers generator using breadth-first search.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph object.
+    source : list, tensor of nodes
+        Source nodes.
+    reversed : bool, optional
+        If true, traverse following the in-edge direction.
+
+    Returns
+    -------
+    list of node frontiers
+        Each node frontier is a list, tensor of nodes.
+    """
+    ghandle = graph._graph._handle
+    source = utils.toindex(source).todgltensor()
+    ret = _CAPI_DGLBFSNodes(ghandle, source, reversed)
+    all_nodes = utils.toindex(ret(0)).tousertensor()
+    # TODO(minjie): how to support directly creating python list
+    sections = utils.toindex(ret(1)).tousertensor().tolist()
+    return F.split(all_nodes, sections, dim=0)
+
+def topological_nodes_generator(graph, reversed=False):
+    """Node frontiers generator using topological traversal.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph object.
+    reversed : bool, optional
+        If true, traverse following the in-edge direction.
+
+    Returns
+    -------
+    list of node frontiers
+        Each node frontier is a list, tensor of nodes.
+    """
+    ghandle = graph._graph._handle
+    ret = _CAPI_DGLTopologicalNodes(ghandle, reversed)
+    all_nodes = utils.toindex(ret(0)).tousertensor()
+    # TODO(minjie): how to support directly creating python list
+    sections = utils.toindex(ret(1)).tousertensor().tolist()
+    return F.split(all_nodes, sections, dim=0)
+
+def dfs_edges_generator(graph, source, reversed=False):
+    """Edge frontiers generator using depth-first-search (DFS).
+
+    Multiple source nodes can be specified to start the DFS traversal. One
+    needs to make sure that each source node belongs to different connected
+    component, so the frontiers can be easily merged. Otherwise, the behavior
+    is undefined.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph object.
+    source : list, tensor of nodes
+        Source nodes.
+    reversed : bool, optional
+        If true, traverse following the in-edge direction.
+
+    Returns
+    -------
+    list of edge frontiers
+        Each edge frontier is a list, tensor of edges.
+    """
+    ghandle = graph._graph._handle
+    source = utils.toindex(source).todgltensor()
+    ret = _CAPI_DGLDFSEdges(ghandle, source, reversed)
+    all_edges = utils.toindex(ret(0)).tousertensor()
+    # TODO(minjie): how to support directly creating python list
+    sections = utils.toindex(ret(1)).tousertensor().tolist()
+    return F.split(all_edges, sections, dim=0)
+
+def dfs_labeled_edges_generator(
+        graph,
+        source,
+        reversed=False,
+        has_reverse_edge=False,
+        has_nontree_edge=False,
+        return_labels=True):
+    """Produce edges in a depth-first-search (DFS) labeled by type.
+
+    There are three labels: FORWARD(0), REVERSE(1), NONTREE(2)
+
+    A FORWARD edge is one in which `u` has been visisted but `v` has not. A
+    REVERSE edge is one in which both `u` and `v` have been visisted and the
+    edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v`
+    have been visisted but the edge is NOT in the DFS tree.
+
+    Multiple source nodes can be specified to start the DFS traversal. One
+    needs to make sure that each source node belongs to different connected
+    component, so the frontiers can be easily merged. Otherwise, the behavior
+    is undefined.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph object.
+    source : list, tensor of nodes
+        Source nodes.
+    reversed : bool, optional
+        If true, traverse following the in-edge direction.
+    has_reverse_edge : bool, optional
+        True to include reverse edges.
+    has_nontree_edge : bool, optional
+        True to include nontree edges.
+    return_labels : bool, optional
+        True to return the labels of each edge.
+
+    Returns
+    -------
+    list of edge frontiers
+        Each edge frontier is a list, tensor of edges.
+    list of list of int
+        Label of each edge, organized in the same as the edge frontiers.
+    """
+    ghandle = graph._graph._handle
+    source = utils.toindex(source).todgltensor()
+    ret = _CAPI_DGLDFSLabeledEdges(
+            ghandle,
+            source,
+            reversed,
+            has_reverse_edge,
+            has_nontree_edge,
+            return_labels)
+    all_edges = utils.toindex(ret(0)).tousertensor()
+    # TODO(minjie): how to support directly creating python list
+    if return_labels:
+        all_labels = utils.toindex(ret(1)).tousertensor()
+        sections = utils.toindex(ret(2)).tousertensor().tolist()
+        return (F.split(all_edges, sections, dim=0),
+                F.split(all_labels, sections, dim=0))
+    else:
+        sections = utils.toindex(ret(1)).tousertensor().tolist()
+        return F.split(all_edges, sections, dim=0)
+
+_init_api("dgl.traversal")
diff --git a/src/c_api_common.cc b/src/c_api_common.cc
@@ -24,7 +24,7 @@ DLManagedTensor* CreateTmpDLManagedTensor(const TVMArgValue& arg) {
 
 PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
     auto body = [vec](TVMArgs args, TVMRetValue* rv) {
-        int which = args[0];
+        const size_t which = args[0];
         if (which >= vec.size()) {
             LOG(FATAL) << "invalid choice";
         } else {
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,6 +25,7 @@ class GraphOp { @@
        * \return the line graph
        */
       static Graph LineGraph(const Graph* graph, bool backtracking);
       /*!
        * \brief Return a disjoint union of the input graphs.
        *
@@ Expand Down @@