[Doc] Update the docstring of distributed APIs. (dmlc#2025)

* add doc. * update DistGraph. * add DistTensor. * update DistEmbedding. * add partition.py * add sampling. * fix. * add graph partition book and create a base class. * fix test. * add rst. * update doc rst. * update. * fix. * fix docs * update distributed tensor and embeddings. * add checks. * update DistGraph. * update initialization. * fix graph partition book. * update graph partition book. * update partition. * update partition. * fix. * add example code. * update DistGraph * Update python/dgl/distributed/dist_context.py Co-authored-by: Quan (Andy) Gan <[email protected]> * Update python/dgl/distributed/dist_context.py Co-authored-by: Quan (Andy) Gan <[email protected]> * Update python/dgl/distributed/dist_dataloader.py Co-authored-by: Quan (Andy) Gan <[email protected]> * Update python/dgl/distributed/dist_dataloader.py Co-authored-by: Quan (Andy) Gan <[email protected]> * Update python/dgl/distributed/dist_dataloader.py Co-authored-by: Quan (Andy) Gan <[email protected]> * update initialize. * update dataloader. * update distgraph. * update DistGraph. * update DistTensor. * update. * more updates. * fix lint. * add num_nodes and num_edges Co-authored-by: Chao Ma <[email protected]> Co-authored-by: Quan (Andy) Gan <[email protected]> Co-authored-by: xiang song(charlie.song) <[email protected]>
mtphil · Aug 18, 2020 · 75ffc31 · 75ffc31
1 parent 3d65484
commit 75ffc31
Show file tree

Hide file tree

Showing 14 changed files with 899 additions and 467 deletions.
diff --git a/docs/source/api/python/dgl.distributed.rst b/docs/source/api/python/dgl.distributed.rst
@@ -0,0 +1,92 @@
+.. _api-distributed:
+
+dgl.distributed
+=================================
+
+.. automodule:: dgl.distributed
+
+Initialization
+---------------
+
+.. autosummary::
+    :toctree: ../../generated/
+
+    initialize
+
+Distributed Graph
+-----------------
+
+.. autoclass:: DistGraph
+    :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition
+
+Distributed Tensor
+------------------
+
+.. autoclass:: DistTensor
+    :members: part_policy, shape, dtype, name
+
+Distributed Embedding
+---------------------
+
+.. autoclass:: DistEmbedding
+
+.. autoclass:: SparseAdagrad
+    :members: step
+
+Distributed workload split
+--------------------------
+
+.. autosummary::
+    :toctree: ../../generated/
+
+    node_split
+    edge_split
+
+Distributed Sampling
+--------------------
+
+Distributed DataLoader
+``````````````````````
+
+.. currentmodule:: dgl.distributed.dist_dataloader
+
+.. autoclass:: DistDataLoader
+
+Distributed Neighbor Sampling
+`````````````````````````````
+
+.. currentmodule:: dgl.distributed.graph_services
+
+.. autosummary::
+    :toctree: ../../generated/
+
+    sample_neighbors
+    find_edges
+    in_subgraph
+
+Partition
+---------
+
+Graph partition book
+````````````````````
+
+.. currentmodule:: dgl.distributed.graph_partition_book
+
+.. autoclass:: GraphPartitionBook
+    :members: shared_memory, num_partitions, metadata, nid2partid, eid2partid, partid2nids, partid2eids, nid2localnid, eid2localeid, partid
+
+.. autoclass:: PartitionPolicy
+    :members: policy_str, part_id, partition_book, to_local, to_partid, get_part_size, get_size
+
+Split and Load Graphs
+`````````````````````
+
+.. currentmodule:: dgl.distributed.partition
+
+.. autosummary::
+    :toctree: ../../generated/
+
+    load_partition
+    load_partition_book
+    partition_graph
+
diff --git a/docs/source/api/python/index.rst b/docs/source/api/python/index.rst
@@ -12,3 +12,4 @@ API Reference
    dgl.function
    sampling
    dgl.dataloading
+   dgl.distributed
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -110,6 +110,7 @@ Getting Started
    api/python/dgl.function
    api/python/sampling
    api/python/dgl.dataloading
+   api/python/dgl.distributed
 
 .. toctree::
    :maxdepth: 3

diff --git a/python/dgl/distributed/__init__.py b/python/dgl/distributed/__init__.py
@@ -1,10 +1,23 @@
-"""DGL distributed."""
+"""DGL distributed module contains classes and functions to support
+distributed graph neural network training and inference in a cluster of
+machines.
+
+This includes a few submodules:
+
+* distributed data structures including distributed graph, distributed tensor
+  and distributed embeddings.
+* distributed sampling.
+* distributed workload split at runtime.
+* graph partition.
+
+"""
 import os
 import sys
 
-from .dist_graph import DistGraphServer, DistGraph, DistTensor, node_split, edge_split
+from .dist_graph import DistGraphServer, DistGraph, node_split, edge_split
+from .dist_tensor import DistTensor
 from .partition import partition_graph, load_partition, load_partition_book
-from .graph_partition_book import GraphPartitionBook, RangePartitionBook, PartitionPolicy
+from .graph_partition_book import GraphPartitionBook, PartitionPolicy
 from .sparse_emb import SparseAdagrad, DistEmbedding
 
 from .rpc import *

diff --git a/python/dgl/distributed/dist_context.py b/python/dgl/distributed/dist_context.py
@@ -44,7 +44,15 @@ def _init_rpc(ip_config, num_servers, max_queue_size, net_type, role, num_thread
 def initialize(ip_config, num_servers=1, num_workers=0,
                max_queue_size=MAX_QUEUE_SIZE, net_type='socket',
                num_worker_threads=1):
-    """Init rpc service
+    """Initialize DGL's distributed module
+
+    This function initializes DGL's distributed module. It acts differently in server
+    or client modes. In the server mode, it runs the server code and never returns.
+    In the client mode, it builds connections with servers for communication and
+    creates worker processes for distributed sampling. `num_workers` specifies
+    the number of sampling worker processes per trainer process.
+    Users also have to provide the number of server processes on each machine in order
+    to connect to all the server processes in the cluster of machines correctly.
 
     Parameters
     ----------
@@ -57,12 +65,21 @@ def initialize(ip_config, num_servers=1, num_workers=0,
         for distributed sampling.
     max_queue_size : int
         Maximal size (bytes) of client queue buffer (~20 GB on default).
+
         Note that the 20 GB is just an upper-bound and DGL uses zero-copy and
         it will not allocate 20GB memory at once.
-    net_type : str
-        Networking type. Current options are: 'socket'.
+    net_type : str, optional
+        Networking type. Currently the only valid option is ``'socket'``.
+
+        Default: ``'socket'``
     num_worker_threads: int
         The number of threads in a worker process.
+
+    Note
+    ----
+    Users have to invoke this API before any DGL's distributed API and framework-specific
+    distributed API. For example, when used with Pytorch, users have to invoke this function
+    before Pytorch's `pytorch.distributed.init_process_group`.
     """
     if os.environ.get('DGL_ROLE', 'client') == 'server':
         from .dist_graph import DistGraphServer
@@ -138,7 +155,14 @@ def is_initialized():
     return INITIALIZED
 
 def exit_client():
-    """Register exit callback.
+    """Trainer exits
+
+    This function is called automatically when a Python process exits. Normally,
+    the training script does not need to invoke this function at the end.
+
+    In the case that the training script needs to initialize the distributed module
+    multiple times (so far, this is needed in the unit tests), the training script
+    needs to call `exit_client` before calling `initialize` again.
     """
     # Only client with rank_0 will send shutdown request to servers.
     finalize_worker() # finalize workers should be earilier than barrier, and non-blocking

diff --git a/python/dgl/distributed/dist_dataloader.py b/python/dgl/distributed/dist_dataloader.py
@@ -58,31 +58,60 @@ def enable_mp_debug():
 DATALOADER_ID = 0
 
 class DistDataLoader:
-    """DGL customized multiprocessing dataloader, which is designed for using with DistGraph."""
+    """DGL customized multiprocessing dataloader.
+
+    DistDataLoader provides a similar interface to Pytorch's DataLoader to generate mini-batches
+    with multiprocessing. It utilizes the worker processes created by
+    :func:`dgl.distributed.initialize` to parallelize sampling.
+
+    Parameters
+    ----------
+    dataset: a tensor
+        A tensor of node IDs or edge IDs.
+    batch_size: int
+        The number of samples per batch to load.
+    shuffle: bool, optional
+        Set to ``True`` to have the data reshuffled at every epoch (default: ``False``).
+    collate_fn: callable, optional
+        The function is typically used to sample neighbors of the nodes in a batch
+        or the endpoint nodes of the edges in a batch.
+    drop_last: bool, optional
+        Set to ``True`` to drop the last incomplete batch, if the dataset size is not
+        divisible by the batch size. If ``False`` and the size of dataset is not divisible
+        by the batch size, then the last batch will be smaller. (default: ``False``)
+    queue_size: int, optional
+        Size of multiprocessing queue
+
+    Examples
+    --------
+    >>> g = dgl.distributed.DistGraph('graph-name')
+    >>> def sample(seeds):
+    ...     seeds = th.LongTensor(np.asarray(seeds))
+    ...     frontier = dgl.distributed.sample_neighbors(g, seeds, 10)
+    ...     return dgl.to_block(frontier, seeds)
+    >>> dataloader = dgl.distributed.DistDataLoader(dataset=nodes, batch_size=1000,
+                                                    collate_fn=sample, shuffle=True)
+    >>> for block in dataloader:
+    ...     feat = g.ndata['features'][block.srcdata[dgl.NID]]
+    ...     labels = g.ndata['labels'][block.dstdata[dgl.NID]]
+    ...     pred = model(block, feat)
+
+    Note
+    ----
+    When performing DGL's distributed sampling with multiprocessing, users have to use this class
+    instead of Pytorch's DataLoader because DGL's RPC requires that all processes establish
+    connections with servers before invoking any DGL's distributed API. Therefore, this dataloader
+    uses the worker processes created in :func:`dgl.distributed.initialize`.
+
+    Note
+    ----
+    This dataloader does not guarantee the iteration order. For example,
+    if dataset = [1, 2, 3, 4], batch_size = 2 and shuffle = False, the order of [1, 2]
+    and [3, 4] is not guaranteed.
+    """
 
     def __init__(self, dataset, batch_size, shuffle=False, collate_fn=None, drop_last=False,
                  queue_size=None):
-        """
-        This class will utilize the worker process created by dgl.distributed.initialize function
-
-        Note that the iteration order is not guaranteed with this class. For example,
-         if dataset = [1, 2, 3, 4], batch_size = 2 and shuffle = False, the order of [1, 2]
-         and [3, 4] is not guaranteed.
-
-        dataset (Dataset): dataset from which to load the data.
-        batch_size (int, optional): how many samples per batch to load
-            (default: ``1``).
-        shuffle (bool, optional): set to ``True`` to have the data reshuffled
-            at every epoch (default: ``False``).
-        collate_fn (callable, optional): merges a list of samples to form a
-            mini-batch of Tensor(s).  Used when using batched loading from a
-            map-style dataset.
-        drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,
-            if the dataset size is not divisible by the batch size. If ``False`` and
-            the size of dataset is not divisible by the batch size, then the last batch
-            will be smaller. (default: ``False``)
-        queue_size (int, optional): Size of multiprocessing queue
-        """
         self.pool, self.num_workers = get_sampler_pool()
         if queue_size is None:
             queue_size = self.num_workers * 4 if self.num_workers > 0 else 4