[Doc] update doc string (dmlc#426)

mysqlsc · Mar 2, 2019 · a88f351 · a88f351
1 parent 3cc32a9
commit a88f351
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 42 deletions.
diff --git a/docs/source/api/python/index.rst b/docs/source/api/python/index.rst
@@ -15,3 +15,4 @@ API Reference
    data
    transform
    nn
+   subgraph
diff --git a/docs/source/api/python/subgraph.rst b/docs/source/api/python/subgraph.rst
@@ -1,13 +1,14 @@
 .. _apigraph:
 
 DGLSubGraph -- Class for subgraph data structure
-=========================================
+================================================
 
-.. currentmodule:: dgl
+.. currentmodule:: dgl.subgraph
 .. autoclass:: DGLSubGraph
 
- Mapping between subgraph and parent graph
- -------------------------------------
+Mapping between subgraph and parent graph
+-----------------------------------------
+.. autosummary::
     :toctree: ../../generated/
 
     DGLSubGraph.parent_nid
@@ -16,6 +17,7 @@ DGLSubGraph -- Class for subgraph data structure
 
 Synchronize features between subgraph and parent graph
 ------------------------------------------------------
+.. autosummary::
     :toctree: ../../generated/
 
     DGLSubGraph.copy_from_parent

diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py
@@ -192,52 +192,77 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
                     shuffle=False, num_workers=1, prefetch=False, add_self_loop=False):
     '''Create a sampler that samples neighborhood.
 
-    This creates a NodeFlow loader that samples subgraphs from the input graph
-    with neighbor sampling. This sampling method is implemented in C and can perform
-    sampling very efficiently.
+    It returns a generator of :class:`~dgl.NodeFlow`. This can be viewed as
+    an analogy of *mini-batch training* on graph data -- the given graph represents
+    the whole dataset and the returned generator produces mini-batches (in the form
+    of :class:`~dgl.NodeFlow` objects).
     
-    A NodeFlow grows from a seed vertex. It contains sampled neighbors
-    of the seed vertex as well as the edges that connect neighbor nodes with
-    seed nodes. When the number of hops is k (>1), the neighbors are sampled
-    from the k-hop neighborhood. In this case, the sampled edges are the ones
-    that connect the source nodes and the sampled neighbor nodes of the source
-    nodes.
+    A NodeFlow grows from sampled nodes. It first samples a set of nodes from the given
+    ``seed_nodes`` (or all the nodes if not given), then samples their neighbors
+    and extracts the subgraph. If the number of hops is :math:`k(>1)`, the process is repeated
+    recursively, with the neighbor nodes just sampled become the new seed nodes.
+    The result is a graph we defined as :class:`~dgl.NodeFlow` that contains :math:`k+1`
+    layers. The last layer is the initial seed nodes. The sampled neighbor nodes in
+    layer :math:`i+1` are in layer :math:`i`. All the edges are from nodes
+    in layer :math:`i` to layer :math:`i+1`.
 
-    The NodeFlow loader returns a list of NodeFlows. The size of the NodeFlow list
-    is the number of workers.
+    TODO(minjie): give a figure here.
+    
+    As an analogy to mini-batch training, the ``batch_size`` here is equal to the number
+    of the initial seed nodes (number of nodes in the last layer).
+    The number of nodeflow objects (the number of batches) is calculated by
+    ``len(seed_nodes) // batch_size`` (if ``seed_nodes`` is None, then it is equal
+    to the set of all nodes in the graph).
 
     Parameters
     ----------
-    g: the DGLGraph where we sample NodeFlows.
-    batch_size: The number of NodeFlows in a batch.
-    expand_factor: the number of neighbors sampled from the neighbor list
-        of a vertex. The value of this parameter can be
-        an integer: indicates the number of neighbors sampled from a neighbor list.
-        a floating-point: indicates the ratio of the sampled neighbors in a neighbor list.
-        string: indicates some common ways of calculating the number of sampled neighbors,
-        e.g., 'sqrt(deg)'.
-    num_hops: The size of the neighborhood where we sample vertices.
-    neighbor_type: indicates the neighbors on different types of edges.
-        "in" means the neighbors on the in-edges, "out" means the neighbors on
-        the out-edges and "both" means neighbors on both types of edges.
-    node_prob: the probability that a neighbor node is sampled.
-        1D Tensor. None means uniform sampling. Otherwise, the number of elements
-        should be the same as the number of vertices in the graph.
-    seed_nodes: a list of nodes where we sample NodeFlows from.
-        If it's None, the seed vertices are all vertices in the graph.
-    shuffle: indicates the sampled NodeFlows are shuffled.
-    num_workers: the number of worker threads that sample NodeFlows in parallel.
-    prefetch : bool, default False
-        Whether to prefetch the samples in the next batch.
-    add_self_loop : bool, default False
-        Whether to add self loop to the sampled NodeFlow.
-        If True, the edge IDs of the self loop edges are -1.
+    g : DGLGraph
+        The DGLGraph where we sample NodeFlows.
+    batch_size : int
+        The batch size (i.e, the number of nodes in the last layer)
+    expand_factor : int, float, str
+        The number of neighbors sampled from the neighbor list of a vertex.
+        The value of this parameter can be:
+
+        * int: indicates the number of neighbors sampled from a neighbor list.
+        * float: indicates the ratio of the sampled neighbors in a neighbor list.
+        * str: indicates some common ways of calculating the number of sampled neighbors,
+          e.g., ``sqrt(deg)``.
+
+    num_hops : int, optional
+        The number of hops to sample (i.e, the number of layers in the NodeFlow).
+        Default: 1
+    neighbor_type: str, optional
+        Indicates the neighbors on different types of edges.
+
+        * "in": the neighbors on the in-edges.
+        * "out": the neighbors on the out-edges.
+        * "both": the neighbors on both types of edges.
+
+        Default: "in"
+    node_prob : Tensor, optional
+        A 1D tensor for the probability that a neighbor node is sampled.
+        None means uniform sampling. Otherwise, the number of elements
+        should be equal to the number of vertices in the graph.
+        Default: None
+    seed_nodes : Tensor, optional
+        A 1D tensor  list of nodes where we sample NodeFlows from.
+        If None, the seed vertices are all the vertices in the graph.
+        Default: None
+    shuffle : bool, optional
+        Indicates the sampled NodeFlows are shuffled. Default: False
+    num_workers : int, optional
+        The number of worker threads that sample NodeFlows in parallel. Default: 1
+    prefetch : bool, optional
+        If true, prefetch the samples in the next batch. Default: False
+    add_self_loop : bool, optional
+        If true, add self loop to the sampled NodeFlow.
+        The edge IDs of the self loop edges are -1. Default: False
 
     Returns
     -------
-    A NodeFlow iterator
-        The iterator returns a list of batched NodeFlows and a dictionary of additional
-        information about the NodeFlows.
+    generator
+        The generator of NodeFlows.
     '''
     loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob,
                               seed_nodes, shuffle, num_workers, add_self_loop)
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,3 +15,4 @@ API Reference @@
        data
        transform
        nn
+       subgraph