From 7cca5007877caecf46212cd7b97262d7e19dc2af Mon Sep 17 00:00:00 2001 From: Mufei Li Date: Wed, 5 Dec 2018 05:51:46 +0800 Subject: [PATCH] [Doc] Computing with DGLGraph (#212) * Docs for graph computing * Docs for graph computing * Fix * WIP * update the docstring --- docs/source/api/python/udf.rst | 10 + python/dgl/graph.py | 826 ++++++++++++++++++++++++++++----- 2 files changed, 714 insertions(+), 122 deletions(-) diff --git a/docs/source/api/python/udf.rst b/docs/source/api/python/udf.rst index 0c85db912640..c668ab6869cf 100644 --- a/docs/source/api/python/udf.rst +++ b/docs/source/api/python/udf.rst @@ -4,6 +4,16 @@ User-defined function related data structures ================================================== .. currentmodule:: dgl.udf +.. automodule:: dgl.udf + +There are two types of user-defined functions in DGL: + +* **Node UDF** of signature ``NodeBatch -> dict``. The argument represents + a batch of nodes. The returned dictionary should have ``str`` type key and ``tensor`` + type values. +* **Edge UDF** of signature ``EdgeBatch -> dict``. The argument represents + a batch of edges. The returned dictionary should have ``str`` type key and ``tensor`` + type values. EdgeBatch --------- diff --git a/python/dgl/graph.py b/python/dgl/graph.py index fd5ad35ffec0..6d4e18e2837e 100644 --- a/python/dgl/graph.py +++ b/python/dgl/graph.py @@ -81,7 +81,11 @@ class DGLGraph(object): >>> G.add_edges([2, 6, 8], 5) # three edges: 2->5, 6->5, 8->5 - or multiple edges using tensor type (demo in pytorch syntax). + or multiple edges using tensor type + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). >>> import torch as th >>> G.add_edges(th.tensor([3, 4, 5]), 1) # three edges: 3->1, 4->1, 5->1 @@ -132,6 +136,39 @@ class DGLGraph(object): >>> G.edata {'y' : tensor([[2., 2., 2., 2.], [1., 1., 1., 1.]])} + + **Message Passing:** + + One common operation for updating node features is message passing, + where the source nodes send messages through edges to the destinations. + With :class:`DGLGraph`, we can do this with :func:`send` and :func:`recv`. + + In the example below, the source nodes add 1 to their node features as + the messages and send the messages to the destinations. + + >>> # Define the function for sending messages. + >>> def send_source(edges): return {'m': edges.src['x'] + 1} + >>> # Set the function defined to be the default message function. + >>> G.register_message_func(send_source) + >>> # Send messages through all edges. + >>> G.send(G.edges()) + + Just like you need to go to your mailbox for retrieving mails, the destination + nodes also need to receive the messages and potentially update their features. + + >>> # Define a function for summing messages received and replacing the original feature. + >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)} + >>> # Set the function defined to be the default message reduce function. + >>> G.register_reduce_func(simple_reduce) + >>> # All existing edges have node 2 as the destination. + >>> # Receive the messages for node 2 and update its feature. + >>> G.recv(v=2) + >>> G.ndata + {'x': tensor([[1., 1., 1., 1., 1.], + [0., 0., 0., 0., 0.], + [3., 3., 3., 3., 3.]])} # 3 = (1 + 1) + (0 + 1) + + For more examples about message passing, please read our tutorials. """ def __init__(self, graph_data=None, @@ -188,7 +225,12 @@ def add_nodes(self, num, data=None): >>> g.number_of_nodes() 5 - Adding new nodes with features (using PyTorch as example): + Adding new nodes with features: + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + >>> import torch as th >>> g.add_nodes(2, {'x': th.ones(2, 4)}) # default zero initializer >>> g.ndata['x'] @@ -235,6 +277,11 @@ def add_edge(self, u, v, data=None): >>> G.add_edge(0, 1) Adding new edge with features + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + >>> import torch as th >>> G.add_edge(0, 2, {'x': th.ones(1, 4)}) >>> G.edges() @@ -284,6 +331,11 @@ def add_edges(self, u, v, data=None): >>> G.add_edges([0, 2], [1, 3]) # add edges (0, 1) and (2, 3) Adding new edges with features + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + >>> import torch as th >>> G.add_edges([1, 3], [2, 0], {'x': th.ones(2, 4)}) # (1, 2), (3, 0) >>> G.edata['x'] @@ -1356,56 +1408,133 @@ def pop_e_repr(self, key): def register_message_func(self, func): """Register global message function. + Once registered, ``func`` will be used as the default + message function in message passing operations, including + :func:`send`, :func:`send_and_recv`, :func:`pull`, + :func:`push`, :func:`update_all`. + Parameters ---------- func : callable - Message function on the edge. + Message function on the edge. The function should be + an :mod:`Edge UDF `. + + See Also + -------- + send + send_and_recv + pull + push + update_all """ self._message_func = func def register_reduce_func(self, func): """Register global message reduce function. + Once registered, ``func`` will be used as the default + message reduce function in message passing operations, including + :func:`recv`, :func:`send_and_recv`, :func:`push`, :func:`pull`, + :func:`update_all`. + Parameters ---------- - func : str or callable - Reduce function on incoming edges. + func : callable + Reduce function on the node. The function should be + a :mod:`Node UDF `. + + See Also + -------- + recv + send_and_recv + push + pull + update_all """ self._reduce_func = func def register_apply_node_func(self, func): """Register global node apply function. + Once registered, ``func`` will be used as the default apply + node function. Related operations include :func:`apply_nodes`, + :func:`recv`, :func:`send_and_recv`, :func:`push`, :func:`pull`, + :func:`update_all`. + Parameters ---------- func : callable - Apply function on the node. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. + + See Also + -------- + apply_nodes + register_apply_edge_func """ self._apply_node_func = func def register_apply_edge_func(self, func): """Register global edge apply function. + Once registered, ``func`` will be used as the default apply + edge function in :func:`apply_edges`. + Parameters ---------- - edge_func : callable - Apply function on the edge. + func : callable + Apply function on the edge. The function should be + an :mod:`Edge UDF `. + + See Also + -------- + apply_edges + register_apply_node_func """ self._apply_edge_func = func def apply_nodes(self, func="default", v=ALL, inplace=False): - """Apply the function on the node features. + """Apply the function on the nodes to update their features. - Applying a None function will be ignored. + If None is provided for ``func``, nothing will happen. Parameters ---------- - func : callable, optional - The UDF applied on the node features. + func : callable or None, optional + Apply function on the nodes. The function should be + a :mod:`Node UDF `. v : int, iterable of int, tensor, optional - The node id(s). - inplace: bool, optional + The node (ids) on which to apply ``func``. The default + value is all the nodes. + inplace : bool, optional If True, update will be done in place, but autograd will break. + + Examples + -------- + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + >>> import dgl + >>> import torch as th + + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.ones(3, 1) + + >>> # Increment the node feature by 1. + >>> def increment_feature(nodes): return {'x': nodes.data['x'] + 1} + >>> g.apply_nodes(func=increment_feature, v=[0, 2]) # Apply func to nodes 0, 2 + >>> g.ndata + {'x': tensor([[2.], + [1.], + [2.]])} + + See Also + -------- + register_apply_node_func + apply_edges """ if func == "default": func = self._apply_node_func @@ -1421,22 +1550,51 @@ def apply_nodes(self, func="default", v=ALL, inplace=False): Runtime.run(prog) def apply_edges(self, func="default", edges=ALL, inplace=False): - """Apply the function on the edge features. + """Apply the function on the edges to update their features. + + If None is provided for ``func``, nothing will happen. Parameters ---------- func : callable, optional - The UDF applied on the edge features. - edges : edges, optional - Edges can be a pair of endpoint nodes (u, v), or a - tensor of edge ids. The default value is all the edges. + Apply function on the edge. The function should be + an :mod:`Edge UDF `. + edges : valid edges type, optional + Edges on which to apply ``func``. See :func:`send` for valid + edges type. Default is all the edges. inplace: bool, optional If True, update will be done in place, but autograd will break. Notes ----- - On multigraphs, if u and v are specified, then all the edges - between u and v will be updated. + On multigraphs, if :math:`u` and :math:`v` are specified, then all the edges + between :math:`u` and :math:`v` will be updated. + + Examples + -------- + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + >>> import dgl + >>> import torch as th + + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.add_edges([0, 1], [1, 2]) # 0 -> 1, 1 -> 2 + >>> g.edata['y'] = th.ones(2, 1) + + >>> # Doubles the edge feature. + >>> def double_feature(edges): return {'y': edges.data['y'] * 2} + >>> g.apply_edges(func=double_feature, edges=0) # Apply func to the first edge. + >>> g.edata + {'y': tensor([[2.], # 2 * 1 + [1.]])} + + See Also + -------- + apply_nodes """ if func == "default": func = self._apply_edge_func @@ -1464,21 +1622,41 @@ def apply_edges(self, func="default", edges=ALL, inplace=False): inplace=inplace) Runtime.run(prog) - def send(self, edges, message_func="default"): + def send(self, edges=ALL, message_func="default"): """Send messages along the given edges. + ``edges`` can be any of the following types: + + * ``int`` : Specify one edge using its edge id. + * ``pair of int`` : Specify one edge using its endpoints. + * ``int iterable`` / ``tensor`` : Specify multiple edges using their edge ids. + * ``pair of int iterable`` / ``pair of tensors`` : + Specify multiple edges using their endpoints. + + The UDF returns messages on the edges and can be later fetched in + the destination node's ``mailbox``. Receiving will consume the messages. + See :func:`recv` for example. + + If multiple ``send`` are triggered on the same edge without ``recv``. Messages + generated by the later ``send`` will overwrite previous messages. + Parameters ---------- - edges : edges, optional - Edges can be a pair of endpoint nodes (u, v), or a - tensor of edge ids. + edges : valid edges type, optional + Edges on which to apply ``message_func``. Default is sending along all + the edges. message_func : callable - The message function. + Message function on the edges. The function should be + an :mod:`Edge UDF `. Notes ----- - On multigraphs, if u and v are specified, then the messages will be sent - along all edges between u and v. + On multigraphs, if :math:`u` and :math:`v` are specified, then the messages will be sent + along all edges between :math:`u` and :math:`v`. + + Examples + -------- + See the *message passing* example in :class:`DGLGraph` or :func:`recv`. """ if message_func == "default": message_func = self._message_func @@ -1505,26 +1683,84 @@ def send(self, edges, message_func="default"): self._msg_graph.add_edges(u, v) def recv(self, - v, + v=ALL, reduce_func="default", apply_node_func="default", inplace=False): - """Receive and reduce in-coming messages and update representation on node v. + """Receive and reduce incoming messages and update the features of node(s) :math:`v`. + + Optionally, apply a function to update the node features after receive. - TODO(minjie): document on zero-in-degree case - TODO(minjie): document on how returned new features are merged with the old features - TODO(minjie): document on how many times UDFs will be called + * `reduce_func` will be skipped for nodes with no incoming message. + * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`. + * If some ``v`` have no incoming message, their new feature value will be calculated + by the column initializer (see :func:`set_n_initializer`). The feature shapes and + dtypes will be inferred. + + The node features will be updated by the result of the ``reduce_func``. + + Messages are consumed once received. + + The provided UDF maybe called multiple times so it is recommended to provide + function with no side effect. Parameters ---------- - v : node, container or tensor - The node to be updated. - reduce_func : callable - The reduce function. - apply_node_func : callable, optional - The update function. + v : node, container or tensor, optional + The node to be updated. Default is receiving all the nodes. + reduce_func : callable, optional + Reduce function on the node. The function should be + a :mod:`Node UDF `. + apply_node_func : callable + Apply function on the nodes. The function should be + a :mod:`Node UDF `. inplace: bool, optional - If True, update will be done in place, but autograd will break. + If True, update will be done in place, but autograd will break. + + Examples + -------- + Create a graph object for demo. + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]]) + >>> g.add_edges([0, 1], [1, 2]) + + >>> # Define the function for sending node features as messages. + >>> def send_source(edges): return {'m': edges.src['x']} + >>> # Set the function defined to be the default message function. + >>> g.register_message_func(send_source) + + >>> # Sum the messages received and use this to replace the original node feature. + >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)} + >>> # Set the function defined to be the default message reduce function. + >>> g.register_reduce_func(simple_reduce) + + Send and receive messages. Note that although node :math:`0` has no incoming edges, + its feature gets changed from :math:`1` to :math:`0` as it is also included in + ``g.nodes()``. + + >>> g.send(g.edges()) + >>> g.recv(g.nodes()) + >>> g.ndata['x'] + tensor([[0.], + [1.], + [2.]]) + + Once messages are received, one will need another call of :func:`send` again before + another call of :func:`recv`. Otherwise, nothing will happen. + + >>> g.recv(g.nodes()) + >>> g.ndata['x'] + tensor([[0.], + [1.], + [2.]]) """ if reduce_func == "default": reduce_func = self._reduce_func @@ -1536,8 +1772,7 @@ def recv(self, # no message has ever been sent return - v_is_all = is_all(v) - if v_is_all: + if is_all(v): v = F.arange(0, self.number_of_nodes()) elif isinstance(v, int): v = [v] @@ -1563,29 +1798,76 @@ def send_and_recv(self, reduce_func="default", apply_node_func="default", inplace=False): - """Send messages along edges and receive them on the targets. + """Send messages along edges and let destinations receive them. + + Optionally, apply a function to update the node features after receive. + + This is a convenient combination for performing + ``send(self, self.edges, message_func)`` and + ``recv(self, dst, reduce_func, apply_node_func)``, where ``dst`` + are the destinations of the ``edges``. Parameters ---------- - edges : edges - Edges can be a pair of endpoint nodes (u, v), or a - tensor of edge ids. The default value is all the edges. + edges : valid edges type + Edges on which to apply ``func``. See :func:`send` for valid + edges type. message_func : callable, optional - The message function. Registered function will be used if not - specified. + Message function on the edges. The function should be + an :mod:`Edge UDF `. reduce_func : callable, optional - The reduce function. Registered function will be used if not - specified. + Reduce function on the node. The function should be + a :mod:`Node UDF `. apply_node_func : callable, optional - The update function. Registered function will be used if not - specified. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. inplace: bool, optional If True, update will be done in place, but autograd will break. + Examples + -------- + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]]) + >>> g.add_edges([0, 1], [1, 2]) + + >>> # Define the function for sending node features as messages. + >>> def send_source(edges): return {'m': edges.src['x']} + >>> # Set the function defined to be the default message function. + >>> g.register_message_func(send_source) + + >>> # Sum the messages received and use this to replace the original node feature. + >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)} + >>> # Set the function defined to be the default message reduce function. + >>> g.register_reduce_func(simple_reduce) + + Send and receive messages. + + >>> g.send_and_recv(g.edges()) + >>> g.ndata['x'] + tensor([[1.], + [1.], + [2.]]) + + Note that the feature of node :math:`0` remains the same as it has no + incoming edges. + Notes ----- On multigraphs, if u and v are specified, then the messages will be sent and received along all edges between u and v. + + See Also + -------- + send + recv """ if message_func == "default": message_func = self._message_func @@ -1626,20 +1908,81 @@ def pull(self, reduce_func="default", apply_node_func="default", inplace=False): - """Pull messages from the node's predecessors and then update it. + """Pull messages from the node(s)' predecessors and then update their features. + + Optionally, apply a function to update the node features after receive. + + * `reduce_func` will be skipped for nodes with no incoming message. + * If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`. + * If some ``v`` have no incoming message, their new feature value will be calculated + by the column initializer (see :func:`set_n_initializer`). The feature shapes and + dtypes will be inferred. Parameters ---------- - v : node, container or tensor - The node to be updated. - message_func : callable - The message function. - reduce_func : callable - The reduce function. + v : int, iterable of int, or tensor + The node(s) to be updated. + message_func : callable, optional + Message function on the edges. The function should be + an :mod:`Edge UDF `. + reduce_func : callable, optional + Reduce function on the node. The function should be + a :mod:`Node UDF `. apply_node_func : callable, optional - The update function. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. inplace: bool, optional - If True, update will be done in place, but autograd will break. + If True, update will be done in place, but autograd will break. + + Examples + -------- + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + Create a graph for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[0.], [1.], [2.]]) + + Use the built-in message function :func:`~dgl.function.copy_src` for copying + node features as the message. + + >>> m_func = dgl.function.copy_src('x', 'm') + >>> g.register_message_func(m_func) + + Use the built-int message reducing function :func:`~dgl.function.sum`, which + sums the messages received and replace the old node features with it. + + >>> m_reduce_func = dgl.function.sum('m', 'x') + >>> g.register_reduce_func(m_reduce_func) + + As no edges exist, nothing happens. + + >>> g.pull(g.nodes()) + >>> g.ndata['x'] + tensor([[0.], + [1.], + [2.]]) + + Add edges ``0 -> 1, 1 -> 2``. Pull messages for the node :math:`2`. + + >>> g.add_edges([0, 1], [1, 2]) + >>> g.pull(2) + >>> g.ndata['x'] + tensor([[0.], + [1.], + [1.]]) + + The feature of node :math:`2` changes but the feature of node :math:`1` + remains the same as we did not :func:`pull` (and reduce) messages for it. + + See Also + -------- + push """ if message_func == "default": message_func = self._message_func @@ -1669,20 +2012,75 @@ def push(self, reduce_func="default", apply_node_func="default", inplace=False): - """Send message from the node to its successors and update them. + """Send message from the node(s) to their successors and update them. + + Optionally, apply a function to update the node features after receive. Parameters ---------- - u : node, container or tensor - The node that sends out messages. - message_func : callable - The message function. - reduce_func : callable - The reduce function. - apply_node_func : callable - The update function. + u : int, iterable of int, or tensor + The node(s) to push messages out. + message_func : callable, optional + Message function on the edges. The function should be + an :mod:`Edge UDF `. + reduce_func : callable, optional + Reduce function on the node. The function should be + a :mod:`Node UDF `. + apply_node_func : callable, optional + Apply function on the nodes. The function should be + a :mod:`Node UDF `. inplace: bool, optional - If True, update will be done in place, but autograd will break. + If True, update will be done in place, but autograd will break. + + Examples + -------- + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + Create a graph for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]]) + + Use the built-in message function :func:`~dgl.function.copy_src` for copying + node features as the message. + + >>> m_func = dgl.function.copy_src('x', 'm') + >>> g.register_message_func(m_func) + + Use the built-int message reducing function :func:`~dgl.function.sum`, which + sums the messages received and replace the old node features with it. + + >>> m_reduce_func = dgl.function.sum('m', 'x') + >>> g.register_reduce_func(m_reduce_func) + + As no edges exist, nothing happens. + + >>> g.push(g.nodes()) + >>> g.ndata['x'] + tensor([[1.], + [2.], + [3.]]) + + Add edges ``0 -> 1, 1 -> 2``. Send messages from the node :math:`1`. and update. + + >>> g.add_edges([0, 1], [1, 2]) + >>> g.push(1) + >>> g.ndata['x'] + tensor([[1.], + [2.], + [2.]]) + + The feature of node :math:`2` changes but the feature of node :math:`1` + remains the same as we did not :func:`push` for node :math:`0`. + + See Also + -------- + pull """ if message_func == "default": message_func = self._message_func @@ -1710,16 +2108,30 @@ def update_all(self, message_func="default", reduce_func="default", apply_node_func="default"): - """Send messages through all the edges and update all nodes. + """Send messages through all edges and update all nodes. + + Optionally, apply a function to update the node features after receive. + + This is a convenient combination for performing + ``send(self, self.edges(), message_func)`` and + ``recv(self, self.nodes(), reduce_func, apply_node_func)``. Parameters ---------- - message_func : callable - The message function. - reduce_func : callable - The reduce function. + message_func : callable, optional + Message function on the edges. The function should be + an :mod:`Edge UDF `. + reduce_func : callable, optional + Reduce function on the node. The function should be + a :mod:`Node UDF `. apply_node_func : callable, optional - The update function. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. + + See Also + -------- + send + recv """ if message_func == "default": message_func = self._message_func @@ -1742,7 +2154,8 @@ def prop_nodes(self, message_func="default", reduce_func="default", apply_node_func="default"): - """Propagate messages using graph traversal by triggering pull() on nodes. + """Propagate messages using graph traversal by triggering + :func:`pull()` on nodes. The traversal order is specified by the ``nodes_generator``. It generates node frontiers, which is a list or a tensor of nodes. The nodes in the @@ -1751,14 +2164,64 @@ def prop_nodes(self, Parameters ---------- - node_generators : generator - The generator of node frontiers. + node_generators : iterable, each element is a list or a tensor of node ids + The generator of node frontiers. It specifies which nodes perform + :func:`pull` at each timestep. message_func : callable, optional - The message function. + Message function on the edges. The function should be + an :mod:`Edge UDF `. reduce_func : callable, optional - The reduce function. + Reduce function on the node. The function should be + a :mod:`Node UDF `. apply_node_func : callable, optional - The update function. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. + + Examples + -------- + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + Create a graph for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(4) + >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.], [4.]]) + >>> g.add_edges([0, 1, 1, 2], [1, 2, 3, 3]) + + Prepare message function and message reduce function for demo. + + >>> def send_source(edges): return {'m': edges.src['x']} + >>> g.register_message_func(send_source) + >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)} + >>> g.register_reduce_func(simple_reduce) + + First pull messages for nodes :math:`1, 2` with edges ``0 -> 1`` and + ``1 -> 2``; and then pull messages for node :math:`3` with edges + ``1 -> 3`` and ``2 -> 3``. + + >>> g.prop_nodes([[1, 2], [3]]) + >>> g.ndata['x'] + tensor([[1.], + [1.], + [2.], + [3.]]) + + In the first stage, we pull messages for nodes :math:`1, 2`. + The feature of node :math:`1` is replaced by that of node :math:`0`, i.e. 1 + The feature of node :math:`2` is replaced by that of node :math:`1`, i.e. 2. + Both of the replacement happen simultaneously. + + In the second stage, we pull messages for node :math:`3`. + The feature of node :math:`3` becomes the sum of node :math:`1`'s feature and + :math:`2`'s feature, i.e. 1 + 2 = 3. + + See Also + -------- + prop_edges """ for node_frontier in nodes_generator: self.pull(node_frontier, @@ -1769,24 +2232,79 @@ def prop_edges(self, message_func="default", reduce_func="default", apply_node_func="default"): - """Propagate messages using graph traversal by triggering send_and_recv() on edges. + """Propagate messages using graph traversal by triggering + :func:`send_and_recv()` on edges. + + The traversal order is specified by the ``edges_generator``. It generates + edge frontiers. The edge frontiers should be of *valid edges type*. + See :func:`send` for more details. - The traversal order is specified by the ``edges_generator``. It - generates edge frontiers, which is a list or a tensor of edge ids or - end points. The edges in the same frontier will be triggered together, - while edges in different frontiers will be triggered according to the - generating order. + Edges in the same frontier will be triggered together, while edges in + different frontiers will be triggered according to the generating order. Parameters ---------- edges_generator : generator The generator of edge frontiers. message_func : callable, optional - The message function. + Message function on the edges. The function should be + an :mod:`Edge UDF `. reduce_func : callable, optional - The reduce function. + Reduce function on the node. The function should be + a :mod:`Node UDF `. apply_node_func : callable, optional - The update function. + Apply function on the nodes. The function should be + a :mod:`Node UDF `. + + Examples + -------- + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + Create a graph for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(4) + >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.], [4.]]) + >>> g.add_edges([0, 1, 1, 2], [1, 2, 3, 3]) + + Prepare message function and message reduce function for demo. + + >>> def send_source(edges): return {'m': edges.src['x']} + >>> g.register_message_func(send_source) + >>> def simple_reduce(nodes): return {'x': nodes.mailbox['m'].sum(1)} + >>> g.register_reduce_func(simple_reduce) + + First propagate messages for edges ``0 -> 1``, ``1 -> 3`` and then + propagate messages for edges ``1 -> 2``, ``2 -> 3``. + + >>> g.prop_edges([([0, 1], [1, 3]), ([1, 2], [2, 3])]) + >>> g.ndata['x'] + tensor([[1.], + [1.], + [1.], + [3.]]) + + In the first stage, the following happens simultaneously. + + - The feature of node :math:`1` is replaced by that of + node :math:`0`, i.e. 1. + - The feature of node :math:`3` is replaced by that of + node :math:`1`, i.e. 2. + + In the second stage, the following happens simultaneously. + + - The feature of node :math:`2` is replaced by that of + node :math:`1`, i.e. 1. + - The feature of node :math:`3` is replaced by that of + node :math:`2`, i.e. 3. + + See Also + -------- + prop_nodes """ for edge_frontier in edges_generator: self.send_and_recv(edge_frontier, @@ -1843,7 +2361,9 @@ def subgraphs(self, nodes): nodes in the list. Equivalent to + `` [self.subgraph(nodes_list) for nodes_list in nodes] + `` Parameters ---------- @@ -1943,27 +2463,29 @@ def incidence_matrix(self, type, ctx=F.cpu()): or not. There are three types of an incidence matrix :math:`I`: - - * "in": - - - :math:`I[v, e] = 1` if e is the in-edge of v (or v is the dst node of e); - - :math:`I[v, e] = 0` otherwise. - - * "out": - - - :math:`I[v, e] = 1` if e is the out-edge of v (or v is the src node of e); - - :math:`I[v, e] = 0` otherwise. - - * "both": - - - :math:`I[v, e] = 1` if e is the in-edge of v; - - :math:`I[v, e] = -1` if e is the out-edge of v; - - :math:`I[v, e] = 0` otherwise (including self-loop). + + * ``in``: + + - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v` + (or :math:`v` is the dst node of :math:`e`); + - :math:`I[v, e] = 0` otherwise. + + * ``out``: + + - :math:`I[v, e] = 1` if :math:`e` is the out-edge of :math:`v` + (or :math:`v` is the src node of :math:`e`); + - :math:`I[v, e] = 0` otherwise. + + * ``both``: + + - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v`; + - :math:`I[v, e] = -1` if :math:`e` is the out-edge of :math:`v`; + - :math:`I[v, e] = 0` otherwise (including self-loop). Parameters ---------- type : str - Can be either "in", "out" or "both" + Can be either ``in``, ``out`` or ``both`` ctx : context, optional (default=cpu) The context of returned incidence matrix. @@ -2000,16 +2522,45 @@ def filter_nodes(self, predicate, nodes=ALL): Parameters ---------- predicate : callable - The predicate should take in a NodeBatch object, and return a - boolean tensor with N elements indicating which node satisfy - the predicate. - nodes : container or tensor - The nodes to filter on + A function of signature ``func(nodes) -> tensor``. + ``nodes`` are :class:`NodeBatch` objects as in :mod:`~dgl.udf`. + The ``tensor`` returned should be a 1-D boolean tensor with + each element indicating whether the corresponding node in + the batch satisfies the predicate. + nodes : int, iterable or tensor of ints + The nodes to filter on. Default value is all the nodes. Returns ------- tensor - The filtered nodes + The filtered nodes. + + Examples + -------- + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + + Construct a graph object for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[1.], [-1.], [1.]]) + + Define a function for filtering nodes with feature :math:`1`. + + >>> def has_feature_one(nodes): return (nodes.data['x'] == 1).squeeze(1) + + Filter the nodes with feature :math:`1`. + + >>> g.filter_nodes(has_feature_one) + tensor([0, 2]) + + See Also + -------- + filter_edges """ if is_all(nodes): v = utils.toindex(slice(0, self.number_of_nodes())) @@ -2032,17 +2583,48 @@ def filter_edges(self, predicate, edges=ALL): Parameters ---------- predicate : callable - The predicate should take in an EdgeBatch object, and return a - boolean tensor with E elements indicating which edge satisfy - the predicate. - edges : edges - Edges can be a pair of endpoint nodes (u, v), or a - tensor of edge ids. The default value is all the edges. + A function of signature ``func(edges) -> tensor``. + ``edges`` are :class:`EdgeBatch` objects as in :mod:`~dgl.udf`. + The ``tensor`` returned should be a 1-D boolean tensor with + each element indicating whether the corresponding edge in + the batch satisfies the predicate. + edges : valid edges type + Edges on which to apply ``func``. See :func:`send` for valid + edges type. Default value is all the edges. Returns ------- tensor - The filtered edges + The filtered edges represented by their ids. + + Examples + -------- + + .. note:: Here we use pytorch syntax for demo. The general idea applies + to other frameworks with minor syntax change (e.g. replace + ``torch.tensor`` with ``mxnet.ndarray``). + Construct a graph object for demo. + + >>> import dgl + >>> import torch as th + >>> g = dgl.DGLGraph() + >>> g.add_nodes(3) + >>> g.ndata['x'] = th.tensor([[1.], [-1.], [1.]]) + >>> g.add_edges([0, 1, 2], [2, 2, 1]) + + Define a function for filtering edges whose destinations have + node feature :math:`1`. + + >>> def has_dst_one(edges): return (edges.dst['x'] == 1).squeeze(1) + + Filter the edges whose destination nodes have feature :math:`1`. + + >>> g.filter_edges(has_dst_one) + tensor([0, 1]) + + See Also + -------- + filter_nodes """ if is_all(edges): eid = ALL