comments and case where not all sparse (pytorch#3370)

runtian-zhou · Nov 1, 2017 · fa5efab · fa5efab
1 parent 7c0b16c
commit fa5efab
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 12 deletions.
diff --git a/torch/_utils.py b/torch/_utils.py
@@ -111,6 +111,12 @@ def _flatten_dense_tensors(tensors):
     Since inputs are dense, the resulting tensor will be a concatenated 1D
     buffer. Element-wise operation on this buffer will be equivalent to
     operating individually.
+
+    Arguments:
+        tensors (Iterable[Tensor]): dense tensors to flatten.
+
+    Returns:
+        A contiguous 1D buffer containing input tensors.
     """
     if len(tensors) == 1:
         return tensors[0].contiguous().view(-1)
@@ -127,6 +133,13 @@ def _flatten_dense_tensors(tensors):
 def _flatten_sparse_tensors(tensors):
     """Flatten sparse tensors into two contiguous 1D buffers, one of indices and
     one of values. Assume tensors are of same sparse type.
+
+    Arguments:
+        tensors (Iterable[Tensor]): sparse tensors to flatten.
+
+    Returns:
+        A tuple of two contiguous 1D buffers, one containing input tensors'
+        indices and the other containing the values.
     """
     flat_indices = _flatten_dense_tensors([t._indices() for t in tensors])
     flat_values = _flatten_dense_tensors([t._values() for t in tensors])
@@ -136,6 +149,15 @@ def _flatten_sparse_tensors(tensors):
 def _unflatten_dense_tensors(flat, tensors):
     """View a flat buffer using the sizes of tensors. Assume that tensors are of
     same dense type, and that flat is given by _flatten_dense_tensors.
+
+    Arguments:
+        flat (Tensor): flattened dense tensors to unflatten.
+        tensors (Iterable[Tensor]): dense tensors whose sizes will be used to
+          unflatten flat.
+
+    Returns:
+        Unflattened dense tensors with sizes same as tensors and values from
+        flat.
     """
     outputs = []
     offset = 0
@@ -150,6 +172,16 @@ def _unflatten_sparse_tensors(flat, tensors):
     """View flat buffer (containing indices and values) using the sizes of
     tensors. Assume that tensors are of same sparse type, and that flat is given
     by _flatten_sparse_tensors.
+
+    Arguments:
+        flat (tuple(Tensor, Tensor)): flattened indices and values of sparse
+          tensors to unflatten.
+        tensors (Iterable[Tensor]): sparse tensors whose sizes will be used to
+          unflatten flat.
+
+    Returns:
+        Unflattened sparse tensors with sizes same as tensors and values from
+        flat.
     """
     flat_indices, flat_values = flat
     indices = _unflatten_dense_tensors(flat_indices, [t._indices() for t in tensors])
@@ -162,8 +194,18 @@ def _unflatten_sparse_tensors(flat, tensors):
 
 def _reorder_tensors_as(tensors, ordered_tensors):
     """Assume that tensors are of same order as ordered_tensors within their
-    types, e.g. from _take_tensors. Reorder them to be of same order as
+    types, e.g., from _take_tensors. Reorder them to be of same order as
     ordered_tensors.
+
+    Arguments:
+        tensors (Iterable[Tensor]): tensors to be reordered. They should be of
+          the same order as ordered_tensors within their own types.
+        ordered_tensors (Iterable[Tensor]): tensors whose order will be the
+          reference.
+
+    Returns:
+        Ordered tuple of tensors with contents from tensors and order of
+        ordered_tensors.
     """
     type_dict = defaultdict(list)
     for tensor in tensors:
@@ -173,14 +215,16 @@ def _reorder_tensors_as(tensors, ordered_tensors):
 
 
 def _take_tensors(tensors, size_limit):
-    """Group tensors into chunks. This generator yields a chunk at each call,
+    """Group tensors into chunks. This generator yields a chunk at each time,
     each containing tensors of same type up to certain byte limit in total size.
-    The yielded tensors are only ordered as the original sequence within its
-    types.
 
     Args:
         tensors (Sequence): A sequence of tensors to be separated into chunks.
         size_limit (int): The limit of each chunk in bytes.
+
+    Yields:
+        Blocks of tensors of same type and within size_limit. The yielded
+        tensors are only ordered as the original sequence within its types.
     """
     buf_dict = defaultdict(lambda: [[], 0])
     for tensor in tensors:

diff --git a/torch/cuda/comm.py b/torch/cuda/comm.py
@@ -131,20 +131,26 @@ def reduce_add_coalesced(inputs, destination=None, buffer_size=10485760):
         A tuple of tensors containing an elementwise sum of each group of
         inputs, placed on the ``destination`` device.
     """
-    dense_tensors = []  # shape (num_tensors, num_gpus)
+    dense_tensors = [[] for _ in inputs]  # shape (num_gpus, num_tensors)
     output = []
+    ref_order = []
+    # process sparse ones first since they may have different sizes on different gpus
     for tensor_at_gpus in zip(*inputs):
-        if tensor_at_gpus[0].is_sparse:
+        if all(t.is_sparse for t in tensor_at_gpus):
             result = reduce_add(tensor_at_gpus, destination)
             output.append(result)
+            ref_order.append(tensor_at_gpus[0])
         else:
-            dense_tensors.append(tensor_at_gpus)
-    itrs = [_take_tensors(tensors, buffer_size) for tensors in zip(*dense_tensors)]
+            for coll, t in zip(dense_tensors, tensor_at_gpus):
+                coll.append(t.to_dense() if t.is_sparse else t)
+            ref_order.append(dense_tensors[0][-1])
+    itrs = [_take_tensors(tensors, buffer_size) for tensors in dense_tensors]
+    # now the dense ones, which have consistent sizes
     for chunks in zip(*itrs):
-        tensors = [_flatten_dense_tensors(chunk) for chunk in chunks]
-        result = reduce_add(tensors, destination)
-        output.extend(_unflatten_dense_tensors(result, chunks[0]))
-    return tuple(_reorder_tensors_as(output, inputs[0]))
+        flat_tensors = [_flatten_dense_tensors(chunk) for chunk in chunks]
+        flat_result = reduce_add(flat_tensors, destination)
+        output.extend(_unflatten_dense_tensors(flat_result, chunks[0]))
+    return tuple(_reorder_tensors_as(output, ref_order))
 
 
 def scatter(tensor, devices, chunk_sizes=None, dim=0, streams=None):