Skip to content

Commit

Permalink
remove megatron-lm, no longer pip installable (microsoft#3389)
Browse files Browse the repository at this point in the history
* remove megatron-lm, no longer pip installable

* Add skips to tests that require megatron-lm and can't be run currently.

* formatting

* Formatting

---------

Co-authored-by: Logan Adams <[email protected]>
  • Loading branch information
jeffra and loadams authored Apr 28, 2023
1 parent f7d71ec commit a094c97
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 1 deletion.
1 change: 0 additions & 1 deletion requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ clang-format==16.0.2
docutils<0.18
future
importlib-metadata>=4
megatron-lm==1.1.5
pre-commit>=2.20.0
pytest
pytest-forked
Expand Down
1 change: 1 addition & 0 deletions tests/unit/compression/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def test_linear_layer_compress(self, tmpdir):
assert isinstance(compressed_model.layer[0].attention.self.key, LinearLayer_Compress)
assert isinstance(compressed_model.layer[0].attention.self.value, LinearLayer_Compress)

@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_mpu_compress(self, tmpdir):
if not required_maximum_torch_version(major_version=1, minor_version=13):
pytest.skip("megatron not compatible with torch >1.13")
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/model_parallelism/test_configurable_parallel_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def inputs(self, bs=1, seq_len=20):
class TestConfigurableMP(ConfigurableMP):

@pytest.mark.world_size(1)
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_gpt2_basic(self, tmpdir, inputs):
args_defaults = {
'num_layers': 2,
Expand Down Expand Up @@ -87,6 +88,7 @@ def test_gpt2_basic(self, tmpdir, inputs):
atol=1e-07), f"Baseline output {baseline} is not equal to save-then-load output {test}"

@pytest.mark.world_size(2)
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_gpt2_mp2_no_resize(self, tmpdir, inputs):
args_defaults = {
'num_layers': 2,
Expand Down Expand Up @@ -148,6 +150,7 @@ def run(self, inputs, class_tmpdir):
class TestConfigurableResizeMP(ConfigurableMP):
world_size = [1, 4]

@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test(self, baseline_mp2, inputs, class_tmpdir):
args_defaults = {
'num_layers': 2,
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/model_parallelism/test_configurable_parallel_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class TestConfigurablePP(ConfigurablePP):
pp_size = 2
world_size = 4 # mp_size * pp_size

@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_pp_basic(self, inputs, tmpdir):
# basic test case, mp_size=2, pp_size=2, verify ckpt saving/loading.
args_defaults = {
Expand Down Expand Up @@ -234,30 +235,35 @@ def _test(self, inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resiz
# These tests are divided by baseline model worldsize and test model worldsize
@pytest.mark.world_size(1)
@pytest.mark.parametrize("mp_size, pp_size, mp_resize, pp_resize", [(1, 2, 1, 1)])
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_world_size_2to1(self, inputs, class_tmpdir, checkpoint_tag, baseline_ws2, mp_size, pp_size, mp_resize,
pp_resize):
self._test(inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resize, pp_resize)

@pytest.mark.world_size(1)
@pytest.mark.parametrize("mp_size, pp_size, mp_resize, pp_resize", [(2, 2, 1, 1)])
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_world_size_4to1(self, inputs, class_tmpdir, checkpoint_tag, baseline_ws4, mp_size, pp_size, mp_resize,
pp_resize):
self._test(inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resize, pp_resize)

@pytest.mark.world_size(2)
@pytest.mark.parametrize("mp_size, pp_size, mp_resize, pp_resize", [(2, 2, 2, 1)])
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_world_size_4to2(self, inputs, class_tmpdir, checkpoint_tag, baseline_ws4, mp_size, pp_size, mp_resize,
pp_resize):
self._test(inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resize, pp_resize)

@pytest.mark.world_size(4)
@pytest.mark.parametrize("mp_size, pp_size, mp_resize, pp_resize", [(1, 1, 2, 2)])
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_world_size_1to4(self, inputs, class_tmpdir, checkpoint_tag, baseline_ws1, mp_size, pp_size, mp_resize,
pp_resize):
self._test(inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resize, pp_resize)

@pytest.mark.world_size(4)
@pytest.mark.parametrize("mp_size, pp_size, mp_resize, pp_resize", [(1, 2, 1, 4), (2, 1, 2, 2)])
@pytest.mark.skip(reason="megatron-lm is currently broken so this test cannot be run.")
def test_world_size_2to4(self, inputs, class_tmpdir, checkpoint_tag, baseline_ws2, mp_size, pp_size, mp_resize,
pp_resize):
self._test(inputs, class_tmpdir, checkpoint_tag, mp_size, pp_size, mp_resize, pp_resize)

0 comments on commit a094c97

Please sign in to comment.