Skip to content

Commit

Permalink
[API] Graph traversal (dmlc#103)
Browse files Browse the repository at this point in the history
* bfs, dfs and topological traversal

* dfs and test cases

* Conflicts:
	python/dgl/graph.py
	src/graph/graph.cc
	src/graph/graph_op.cc

* documentation

* requested changes

* Conflicts:
	Jenkinsfile
	examples/pytorch/gcn/gcn.py
	examples/pytorch/gcn/gcn_spmv.py
	python/dgl/graph.py
	python/dgl/graph_index.py
	src/graph/graph.cc
	src/graph/graph_op.cc

* Conflicts:
	Jenkinsfile
	python/dgl/graph_index.py

* fix lint errors

* fix lint errors

* fix lint errors

* fix test cases

* requested changes

* traversal interface

* [Bug] fixed a typo that caused syntax error (dmlc#120)

* WIP

* bfs nodes generator works

* topological traversal

* WIP: dfs_edges

* dfs edges

* dfs labeled edges

* utest for traversal

* fix lint

* fix utest

* code clean

* changes as requested
  • Loading branch information
GaiYu0 authored and jermainewang committed Nov 8, 2018
1 parent 79cecce commit 2319167
Show file tree
Hide file tree
Showing 15 changed files with 776 additions and 47 deletions.
4 changes: 1 addition & 3 deletions examples/pytorch/tree_lstm/tree_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def forward(self, graph, zero_initializer, h=None, c=None, iterator=None, train=
g.set_n_repr({'x' : x, 'h' : h, 'c' : c, 'h_tild' : h_tild, 'c_tild' : c_tild})
# TODO(minjie): potential bottleneck
if iterator is None:
for frontier in topological_traverse(g):
#print('frontier', frontier)
g.pull(frontier)
g.propagate('topo')
else:
for frontier in iterator:
g.pull(frontier)
Expand Down
39 changes: 39 additions & 0 deletions include/dgl/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <vector>
#include <cstdint>
#include <utility>
#include <tuple>
#include "runtime/ndarray.h"

namespace dgl {
Expand All @@ -16,6 +18,7 @@ typedef uint64_t dgl_id_t;
typedef tvm::runtime::NDArray IdArray;
typedef tvm::runtime::NDArray DegreeArray;
typedef tvm::runtime::NDArray BoolArray;
typedef tvm::runtime::NDArray IntArray;

class Graph;
class GraphOp;
Expand Down Expand Up @@ -301,6 +304,42 @@ class Graph {
*/
Graph Reverse() const;

/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector
*/
const std::vector<dgl_id_t>& SuccVec(dgl_id_t vid) const {
return adjlist_[vid].succ;
}

/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector
*/
const std::vector<dgl_id_t>& OutEdgeVec(dgl_id_t vid) const {
return adjlist_[vid].edge_id;
}

/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector
*/
const std::vector<dgl_id_t>& PredVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].succ;
}

/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector
*/
const std::vector<dgl_id_t>& InEdgeVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].edge_id;
}

protected:
friend class GraphOp;
/*! \brief Internal edge list type */
Expand Down
1 change: 1 addition & 0 deletions include/dgl/graph_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class GraphOp {
* \return the line graph
*/
static Graph LineGraph(const Graph* graph, bool backtracking);

/*!
* \brief Return a disjoint union of the input graphs.
*
Expand Down
1 change: 1 addition & 0 deletions python/dgl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
from .batched_graph import *
from .graph import DGLGraph
from .subgraph import DGLSubGraph
from .traversal import *
from .udf import NodeBatch, EdgeBatch
87 changes: 53 additions & 34 deletions python/dgl/graph.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Base graph class specialized for neural networks on graphs.
"""
"""Base graph class specialized for neural networks on graphs."""
from __future__ import absolute_import

import networkx as nx
Expand Down Expand Up @@ -336,7 +335,7 @@ def find_edges(self, eid):
tensor, tensor
The source and destination node IDs.
"""
eid = utils.toindex(u)
eid = utils.toindex(eid)
src, dst, _ = self._graph.find_edges(eid)
return src.tousertensor(), dst.tousertensor()

Expand Down Expand Up @@ -1207,40 +1206,60 @@ def update_all(self,
self.send(ALL, message_func)
self.recv(ALL, reduce_func, apply_node_func)

def propagate(self,
traverser='topo',
message_func="default",
reduce_func="default",
apply_node_func="default",
**kwargs):
"""Propagate messages and update nodes using graph traversal.
def prop_nodes(self,
nodes_generator,
message_func="default",
reduce_func="default",
apply_node_func="default"):
"""Propagate messages using graph traversal by triggering pull() on nodes.
A convenient function for passing messages and updating
nodes according to the traverser. The traverser can be
any of the pre-defined traverser (e.g. 'topo'). User can also provide custom
traverser that generates the edges and nodes.
The traversal order is specified by the ``nodes_generator``. It generates
node frontiers, which is a list or a tensor of nodes. The nodes in the
same frontier will be triggered together, while nodes in different frontiers
will be triggered according to the generating order.
Parameters
----------
traverser : str or generator of edges.
The traverser of the graph.
message_func : str or callable
The message function.
reduce_func : str or callable
The reduce function.
apply_node_func : str or callable
The update function.
kwargs : keyword arguments, optional
Arguments for pre-defined iterators.
node_generators : generator
The generator of node frontiers.
message_func : str or callable, optional
The message function.
reduce_func : str or callable, optional
The reduce function.
apply_node_func : str or callable, optional
The update function.
"""
if isinstance(traverser, str):
# TODO(minjie): Call pre-defined routine to unroll the computation.
raise RuntimeError('Not implemented.')
else:
# NOTE: the iteration can return multiple edges at each step.
for u, v in traverser:
self.send_and_recv((u, v),
message_func, reduce_func, apply_node_func)
for node_frontier in nodes_generator:
self.pull(node_frontier,
message_func, reduce_func, apply_node_func)

def prop_edges(self,
edge_generator,
message_func="default",
reduce_func="default",
apply_node_func="default"):
"""Propagate messages using graph traversal by triggering send_and_recv() on edges.
The traversal order is specified by the ``edges_generator``. It
generates edge frontiers, which is a list or a tensor of edge ids or
end points. The edges in the same frontier will be triggered together,
while edges in different frontiers will be triggered according to the
generating order.
Parameters
----------
edge_generators : generator
The generator of edge frontiers.
message_func : str or callable, optional
The message function.
reduce_func : str or callable, optional
The reduce function.
apply_node_func : str or callable, optional
The update function.
"""
for edge_frontier in edge_generator:
self.send_and_recv(edge_frontier,
message_func, reduce_func, apply_node_func)

def subgraph(self, nodes):
"""Generate the subgraph among the given nodes.
Expand Down Expand Up @@ -1337,7 +1356,7 @@ def merge(self, subgraphs, reduce_func='sum'):
self._edge_frame.num_rows,
reduce_func)

def adjacency_matrix(self, ctx=None):
def adjacency_matrix(self, ctx=F.cpu()):
"""Return the adjacency matrix representation of this graph.
Parameters
Expand All @@ -1352,7 +1371,7 @@ def adjacency_matrix(self, ctx=None):
"""
return self._graph.adjacency_matrix().get(ctx)

def incidence_matrix(self, oriented=False, ctx=None):
def incidence_matrix(self, oriented=False, ctx=F.cpu()):
"""Return the incidence matrix representation of this graph.
Parameters
Expand Down
151 changes: 151 additions & 0 deletions python/dgl/traversal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Module for graph traversal methods."""
from __future__ import absolute_import

from ._ffi.function import _init_api
from . import backend as F
from . import utils

__all__ = ['bfs_nodes_generator', 'topological_nodes_generator',
'dfs_edges_generator', 'dfs_labeled_edges_generator',]

def bfs_nodes_generator(graph, source, reversed=False):
"""Node frontiers generator using breadth-first search.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of node frontiers
Each node frontier is a list, tensor of nodes.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLBFSNodes(ghandle, source, reversed)
all_nodes = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_nodes, sections, dim=0)

def topological_nodes_generator(graph, reversed=False):
"""Node frontiers generator using topological traversal.
Parameters
----------
graph : DGLGraph
The graph object.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of node frontiers
Each node frontier is a list, tensor of nodes.
"""
ghandle = graph._graph._handle
ret = _CAPI_DGLTopologicalNodes(ghandle, reversed)
all_nodes = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_nodes, sections, dim=0)

def dfs_edges_generator(graph, source, reversed=False):
"""Edge frontiers generator using depth-first-search (DFS).
Multiple source nodes can be specified to start the DFS traversal. One
needs to make sure that each source node belongs to different connected
component, so the frontiers can be easily merged. Otherwise, the behavior
is undefined.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
Returns
-------
list of edge frontiers
Each edge frontier is a list, tensor of edges.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLDFSEdges(ghandle, source, reversed)
all_edges = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_edges, sections, dim=0)

def dfs_labeled_edges_generator(
graph,
source,
reversed=False,
has_reverse_edge=False,
has_nontree_edge=False,
return_labels=True):
"""Produce edges in a depth-first-search (DFS) labeled by type.
There are three labels: FORWARD(0), REVERSE(1), NONTREE(2)
A FORWARD edge is one in which `u` has been visisted but `v` has not. A
REVERSE edge is one in which both `u` and `v` have been visisted and the
edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v`
have been visisted but the edge is NOT in the DFS tree.
Multiple source nodes can be specified to start the DFS traversal. One
needs to make sure that each source node belongs to different connected
component, so the frontiers can be easily merged. Otherwise, the behavior
is undefined.
Parameters
----------
graph : DGLGraph
The graph object.
source : list, tensor of nodes
Source nodes.
reversed : bool, optional
If true, traverse following the in-edge direction.
has_reverse_edge : bool, optional
True to include reverse edges.
has_nontree_edge : bool, optional
True to include nontree edges.
return_labels : bool, optional
True to return the labels of each edge.
Returns
-------
list of edge frontiers
Each edge frontier is a list, tensor of edges.
list of list of int
Label of each edge, organized in the same as the edge frontiers.
"""
ghandle = graph._graph._handle
source = utils.toindex(source).todgltensor()
ret = _CAPI_DGLDFSLabeledEdges(
ghandle,
source,
reversed,
has_reverse_edge,
has_nontree_edge,
return_labels)
all_edges = utils.toindex(ret(0)).tousertensor()
# TODO(minjie): how to support directly creating python list
if return_labels:
all_labels = utils.toindex(ret(1)).tousertensor()
sections = utils.toindex(ret(2)).tousertensor().tolist()
return (F.split(all_edges, sections, dim=0),
F.split(all_labels, sections, dim=0))
else:
sections = utils.toindex(ret(1)).tousertensor().tolist()
return F.split(all_edges, sections, dim=0)

_init_api("dgl.traversal")
2 changes: 1 addition & 1 deletion src/c_api_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ DLManagedTensor* CreateTmpDLManagedTensor(const TVMArgValue& arg) {

PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
auto body = [vec](TVMArgs args, TVMRetValue* rv) {
int which = args[0];
const size_t which = args[0];
if (which >= vec.size()) {
LOG(FATAL) << "invalid choice";
} else {
Expand Down
Loading

0 comments on commit 2319167

Please sign in to comment.