Remove call to .contiguous() for local_shard_t.

The call to contiguous was probably left over from a previous implementation and is no longer needed. Had to adjust atol for one of the tests to accomodate for this. Differential Revision: [D36797942](https://our.internmc.facebook.com/intern/diff/D36797942/) Pull Request resolved: pytorch#78598 Approved by: https://github.com/kumpera
bmedishe · Jun 1, 2022 · 5aa2ed1 · 5aa2ed1
1 parent 497ae27
commit 5aa2ed1
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py b/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py
@@ -145,7 +145,7 @@ def _shard_parameter(module, spec):
         )
 
         # Test backward gradient calculation.
-        self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1, atol=1e-4, rtol=1e-6)
+        self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1, atol=1e-3, rtol=1e-6)
         self.assertEqual(sharded_weight_fc2.grad, local_grad_narrowed_fc2, atol=1e-4, rtol=1e-6)
         self.assertEqual(bias_grad_fc1, local_bias_grad_fc1, atol=1e-4, rtol=1e-6)
         self.assertEqual(bias_grad_fc2, local_bias_grad_fc2, atol=1e-4, rtol=1e-6)

diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/linear.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/linear.py
@@ -100,7 +100,7 @@ def sharded_linear(types, args, kwargs, pg):
     bias = args[2]
 
     local_shard = weight.local_tensor()
-    local_shard_t = local_shard.t().contiguous()
+    local_shard_t = local_shard.t()
     sharding_dim = weight._sharding_spec.dim
     world_size = dist.get_world_size(pg)
     rank = dist.get_rank(pg)