From a7cf95a609a09dfb01558dc946b1cd1dfe0f5249 Mon Sep 17 00:00:00 2001 From: pritam <9958665+pritamdamania87@users.noreply.github.com> Date: Wed, 18 May 2022 17:37:55 +0000 Subject: [PATCH] Add sharding tests to multigpu-test.sh (#77708) Summary: These tests were being skipped since they don't run on multigpu jobs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/77708 Approved by: https://github.com/wanchaol --- .jenkins/pytorch/multigpu-test.sh | 23 +++++++++++++++++++++++ test/run_test.py | 6 ++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.jenkins/pytorch/multigpu-test.sh b/.jenkins/pytorch/multigpu-test.sh index 481619a8dc314d..77fe8b2b0bcfea 100755 --- a/.jenkins/pytorch/multigpu-test.sh +++ b/.jenkins/pytorch/multigpu-test.sh @@ -28,4 +28,27 @@ time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl time python test/run_test.py --verbose -i distributed/test_store time python test/run_test.py --verbose -i distributed/test_pg_wrapper time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent +time python test/run_test.py --verbose -i distributed/_shard/checkpoint/test_checkpoint +time python test/run_test.py --verbose -i distributed/_shard/checkpoint/test_file_system_checkpoint +time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec +time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan +time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec +time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops +time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax +time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec +time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim +time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor +time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor assert_git_not_dirty diff --git a/test/run_test.py b/test/run_test.py index c0ad0a55a02b5a..3f21f4d1aefd60 100644 --- a/test/run_test.py +++ b/test/run_test.py @@ -201,6 +201,8 @@ def skip_test_p(name: str) -> bool: "distributed/pipeline/sync/test_worker", "distributed/elastic/agent/server/test/api_test", "distributed/elastic/multiprocessing/api_test", + "distributed/_shard/checkpoint/test_checkpoint" + "distributed/_shard/checkpoint/test_file_system_checkpoint" "distributed/_shard/sharding_spec/test_sharding_spec", "distributed/_shard/sharding_plan/test_sharding_plan", "distributed/_shard/sharded_tensor/test_megatron_prototype", @@ -217,7 +219,6 @@ def skip_test_p(name: str) -> bool: "distributed/_shard/sharded_tensor/ops/test_matrix_ops", "distributed/_shard/sharded_tensor/ops/test_softmax", "distributed/_shard/sharded_tensor/ops/test_tensor_ops", - "distributed/_shard/sharding_spec/test_sharding_spec", "distributed/_shard/sharded_optim/test_sharded_optim", "distributed/_shard/test_partial_tensor", "distributed/_shard/test_replicated_tensor", @@ -228,6 +229,8 @@ def skip_test_p(name: str) -> bool: "distributed/rpc/test_faulty_agent", "distributed/rpc/test_tensorpipe_agent", "distributed/rpc/cuda/test_tensorpipe_agent", + "distributed/_shard/checkpoint/test_checkpoint" + "distributed/_shard/checkpoint/test_file_system_checkpoint" "distributed/_shard/sharding_spec/test_sharding_spec", "distributed/_shard/sharding_plan/test_sharding_plan", "distributed/_shard/sharded_tensor/test_megatron_prototype", @@ -244,7 +247,6 @@ def skip_test_p(name: str) -> bool: "distributed/_shard/sharded_tensor/ops/test_matrix_ops", "distributed/_shard/sharded_tensor/ops/test_softmax", "distributed/_shard/sharded_tensor/ops/test_tensor_ops", - "distributed/_shard/sharding_spec/test_sharding_spec", "distributed/_shard/sharded_optim/test_sharded_optim", "distributed/_shard/test_partial_tensor", "distributed/_shard/test_replicated_tensor",