Tests: Pythia-70m --> Pythia-14m (Lightning-AI#784)

beep-bebop · Nov 27, 2023 · c72c562 · c72c562
1 parent 16b9e35
commit c72c562
Show file tree

Hide file tree

Showing 10 changed files with 36 additions and 36 deletions.
diff --git a/tests/test_adapter.py b/tests/test_adapter.py
@@ -12,7 +12,7 @@ def test_config_identical():
     import lit_gpt.adapter as gpt_adapter
     import lit_gpt.model as gpt
 
-    name = "pythia-70m"
+    name = "pythia-14m"
     base_config = asdict(gpt.Config.from_name(name))
     adapter_config = asdict(gpt_adapter.Config.from_name(name))
     del adapter_config["adapter_prompt_length"]
@@ -29,7 +29,7 @@ def test_adapter_filter(tmp_path):
     from lit_gpt.adapter import GPT, adapter_filter
 
     fabric = Fabric(devices=1)
-    model = GPT.from_name("pythia-70m", n_layer=4)
+    model = GPT.from_name("pythia-14m", n_layer=4)
     save_path = tmp_path / "model.pth"
     fabric.save(save_path, {"model": model}, filter={"model": adapter_filter})
     saved = torch.load(save_path)["model"]
@@ -110,7 +110,7 @@ def test_adapter_gpt_init_weights():
 def test_adapter_compile():
     from lit_gpt.adapter import GPT
 
-    model = GPT.from_name("pythia-70m", n_layer=3)
+    model = GPT.from_name("pythia-14m", n_layer=3)
     x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)
 
     from torch._dynamo.backends import debugging

diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py
@@ -20,7 +20,7 @@ def test_config_identical():
     import lit_gpt.adapter_v2 as gpt_adapter
     import lit_gpt.model as gpt
 
-    name = "pythia-70m"
+    name = "pythia-14m"
     with Fabric(accelerator="cpu").init_module(empty_init=True):
         base_model = gpt.GPT.from_name(name)
         adapter_model = gpt_adapter.GPT.from_name(name)
@@ -35,7 +35,7 @@ def test_adapter_v2_filter(tmp_path):
     from lit_gpt.adapter_v2 import GPT, adapter_filter
 
     fabric = Fabric(devices=1)
-    model = GPT.from_name("pythia-70m", n_layer=3)
+    model = GPT.from_name("pythia-14m", n_layer=3)
     save_path = tmp_path / "model.pth"
     fabric.save(save_path, {"model": model}, filter={"model": adapter_filter})
     saved = torch.load(save_path)["model"]
@@ -150,7 +150,7 @@ def test_base_model_can_be_adapter_v2_loaded(name):
 def test_adapter_v2_compile():
     from lit_gpt.adapter_v2 import GPT
 
-    model = GPT.from_name("pythia-70m", n_layer=3)
+    model = GPT.from_name("pythia-14m", n_layer=3)
     x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)
 
     from torch._dynamo.backends import debugging

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -21,20 +21,20 @@ def test_config():
     config = Config(block_size=2048)
     assert config.block_size == 2048
 
-    config = Config.from_name("pythia-70m")
-    assert config.block_size == 2048
+    config = Config.from_name("pythia-14m")
+    assert config.block_size == 512
 
-    config = Config.from_name("pythia-70m", block_size=4096)
+    config = Config.from_name("pythia-14m", block_size=4096)
     assert config.block_size == 4096
 
-    config = Config(hf_config={"name": "pythia-70m"})
-    assert config.name == "pythia-70m"
+    config = Config(hf_config={"name": "pythia-14m"})
+    assert config.name == "pythia-14m"
 
 
 def test_legacy_args(tmp_path):
     from lit_gpt import Config
 
-    config = Config.from_name("pythia-70m", condense_ratio=2)
+    config = Config.from_name("pythia-14m", condense_ratio=2)
     assert not hasattr(config, "condense_ratio")
     assert config.rope_condense_ratio == 2
 
@@ -103,25 +103,25 @@ def test_from_checkpoint(tmp_path):
         Config.from_checkpoint(tmp_path / "non_existing_checkpoint")
 
     # 2. If `lit_config.py` doesn't exists, but there is a matching config in `lit_gpt/config.py`.
-    config = Config.from_checkpoint(tmp_path / "pythia-70m")
-    assert config.name == "pythia-70m"
-    assert config.block_size == 2048
+    config = Config.from_checkpoint(tmp_path / "pythia-14m")
+    assert config.name == "pythia-14m"
+    assert config.block_size == 512
     assert config.n_layer == 6
 
     # 3. If only `lit_config.py` exists.
-    config_data = {"name": "pythia-70m", "block_size": 24, "n_layer": 2}
+    config_data = {"name": "pythia-14m", "block_size": 24, "n_layer": 2}
     with open(tmp_path / "lit_config.json", "w") as file:
         json.dump(config_data, file)
     config = Config.from_checkpoint(tmp_path)
-    assert config.name == "pythia-70m"
+    assert config.name == "pythia-14m"
     assert config.block_size == 24
     assert config.n_layer == 2
 
     # 4. Both `lit_config.py` and a matching config exist, but `lit_config.py` supersedes matching config
-    (tmp_path / "pythia-70m").mkdir()
-    with open(tmp_path / "pythia-70m/lit_config.json", "w") as file:
+    (tmp_path / "pythia-14m").mkdir()
+    with open(tmp_path / "pythia-14m/lit_config.json", "w") as file:
         json.dump(config_data, file)
-    config = Config.from_checkpoint(tmp_path / "pythia-70m")
-    assert config.name == "pythia-70m"
+    config = Config.from_checkpoint(tmp_path / "pythia-14m")
+    assert config.name == "pythia-14m"
     assert config.block_size == 24
     assert config.n_layer == 2
diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py
@@ -102,12 +102,12 @@ def test_convert_hf_checkpoint(tmp_path):
     from scripts.convert_hf_checkpoint import convert_hf_checkpoint
 
     with pytest.raises(ValueError, match="to contain .bin"):
-        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m")
 
     bin_file = tmp_path / "foo.bin"
     bin_file.touch()
     with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load:
-        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m")
     load.assert_called_with(bin_file)
 
     assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}

diff --git a/tests/test_gptq.py b/tests/test_gptq.py
@@ -15,7 +15,7 @@ def test_gptq_blockwise_quantization():
 
     fabric = L.Fabric(devices=1)
     with fabric.init_module(empty_init=False):
-        model = GPT.from_name("pythia-70m", n_layer=2)
+        model = GPT.from_name("pythia-14m", n_layer=2)
         x = torch.randint(0, 10, (2, model.config.block_size))
 
     from quantize.gptq import blockwise_quantization

diff --git a/tests/test_lm_eval_harness.py b/tests/test_lm_eval_harness.py
@@ -18,9 +18,9 @@ def test_run_eval(tmp_path, float_like):
 
     fabric = Fabric(devices=1)
     with fabric.init_module():
-        model = GPT.from_name("pythia-70m")
-    download_from_hub(repo_id="EleutherAI/pythia-70m", tokenizer_only=True, checkpoint_dir=tmp_path)
-    tokenizer = Tokenizer(tmp_path / "EleutherAI/pythia-70m")
+        model = GPT.from_name("pythia-14m")
+    download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path)
+    tokenizer = Tokenizer(tmp_path / "EleutherAI/pythia-14m")
 
     eval_harness = EvalHarnessBase(fabric, model, tokenizer, 1)
     results = eval_harness.run_eval(
@@ -32,7 +32,7 @@ def test_run_eval(tmp_path, float_like):
             "bootstrap_iters": 2,
             "device": ANY,
             "limit": 2,
-            "model": "pythia-70m",
+            "model": "pythia-14m",
             "no_cache": True,
             "num_fewshot": 0,
         },

diff --git a/tests/test_lora.py b/tests/test_lora.py
@@ -131,7 +131,7 @@ def test_lora_filter(tmp_path):
     from lit_gpt.lora import GPT, lora_filter
 
     fabric = Fabric(devices=1)
-    model = GPT.from_name("pythia-70m", n_layer=3, r=1, to_query=True, to_value=True)
+    model = GPT.from_name("pythia-14m", n_layer=3, r=1, to_query=True, to_value=True)
     save_path = tmp_path / "model.pth"
     fabric.save(save_path, {"model": model}, filter={"model": lora_filter})
     saved = torch.load(save_path)["model"]
@@ -463,7 +463,7 @@ def test_lora_compile():
     from lit_gpt.lora import GPT
 
     model = GPT.from_name(
-        "pythia-70m",
+        "pythia-14m",
         n_layer=3,
         r=8,
         alpha=8,

diff --git a/tests/test_merge_lora.py b/tests/test_merge_lora.py
@@ -14,11 +14,11 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir):
     config = dict(block_size=128, padded_vocab_size=256, n_layer=3, n_head=8, n_embd=16)
     with open(fake_checkpoint_dir / "lit_config.json", "w") as fp:
         json.dump(config, fp)
-    base_model = GPT.from_name("pythia-70m", **config)
+    base_model = GPT.from_name("pythia-14m", **config)
     state_dict = base_model.state_dict()
     assert len(state_dict) == 40
     torch.save(state_dict, fake_checkpoint_dir / "lit_model.pth")
-    lora_model = LoRAGPT.from_name("pythia-70m", **config, r=8, alpha=16, dropout=0.05, to_query=True, to_value=True)
+    lora_model = LoRAGPT.from_name("pythia-14m", **config, r=8, alpha=16, dropout=0.05, to_query=True, to_value=True)
     state_dict = {k: v for k, v in lora_model.state_dict().items() if lora_filter(k, v)}
     assert len(state_dict) == 6
     lora_path = tmp_path / "lora"

diff --git a/tests/test_model.py b/tests/test_model.py
@@ -388,7 +388,7 @@ def test_against_hf_mistral(device, dtype):
 def test_model_compile():
     from lit_gpt import GPT
 
-    model = GPT.from_name("pythia-70m", n_layer=3)
+    model = GPT.from_name("pythia-14m", n_layer=3)
     x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)
 
     from torch._dynamo.backends import debugging
@@ -448,7 +448,7 @@ def generate(logits):
 def test_model_kv_cache_amp():
     from lit_gpt.model import GPT, Config
 
-    config = Config.from_name("pythia-70m", n_layer=2)
+    config = Config.from_name("pythia-14m", n_layer=2)
     model = GPT(config)
     encoded = torch.arange(45)
     model.set_kv_cache(batch_size=1)

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -163,5 +163,5 @@ def test_num_parameters_bitsandbytes(mode):
     assert num_parameters(model) == 110
 
     with fabric.init_module(empty_init=True):
-        model = GPT.from_name("pythia-70m")
-    assert num_parameters(model) == 70426624
+        model = GPT.from_name("pythia-14m")
+    assert num_parameters(model) == 14067712