Skip to content

Commit

Permalink
Tests: Pythia-70m --> Pythia-14m (Lightning-AI#784)
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrei-Aksionov authored Nov 27, 2023
1 parent 16b9e35 commit c72c562
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 36 deletions.
6 changes: 3 additions & 3 deletions tests/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_config_identical():
import lit_gpt.adapter as gpt_adapter
import lit_gpt.model as gpt

name = "pythia-70m"
name = "pythia-14m"
base_config = asdict(gpt.Config.from_name(name))
adapter_config = asdict(gpt_adapter.Config.from_name(name))
del adapter_config["adapter_prompt_length"]
Expand All @@ -29,7 +29,7 @@ def test_adapter_filter(tmp_path):
from lit_gpt.adapter import GPT, adapter_filter

fabric = Fabric(devices=1)
model = GPT.from_name("pythia-70m", n_layer=4)
model = GPT.from_name("pythia-14m", n_layer=4)
save_path = tmp_path / "model.pth"
fabric.save(save_path, {"model": model}, filter={"model": adapter_filter})
saved = torch.load(save_path)["model"]
Expand Down Expand Up @@ -110,7 +110,7 @@ def test_adapter_gpt_init_weights():
def test_adapter_compile():
from lit_gpt.adapter import GPT

model = GPT.from_name("pythia-70m", n_layer=3)
model = GPT.from_name("pythia-14m", n_layer=3)
x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)

from torch._dynamo.backends import debugging
Expand Down
6 changes: 3 additions & 3 deletions tests/test_adapter_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_config_identical():
import lit_gpt.adapter_v2 as gpt_adapter
import lit_gpt.model as gpt

name = "pythia-70m"
name = "pythia-14m"
with Fabric(accelerator="cpu").init_module(empty_init=True):
base_model = gpt.GPT.from_name(name)
adapter_model = gpt_adapter.GPT.from_name(name)
Expand All @@ -35,7 +35,7 @@ def test_adapter_v2_filter(tmp_path):
from lit_gpt.adapter_v2 import GPT, adapter_filter

fabric = Fabric(devices=1)
model = GPT.from_name("pythia-70m", n_layer=3)
model = GPT.from_name("pythia-14m", n_layer=3)
save_path = tmp_path / "model.pth"
fabric.save(save_path, {"model": model}, filter={"model": adapter_filter})
saved = torch.load(save_path)["model"]
Expand Down Expand Up @@ -150,7 +150,7 @@ def test_base_model_can_be_adapter_v2_loaded(name):
def test_adapter_v2_compile():
from lit_gpt.adapter_v2 import GPT

model = GPT.from_name("pythia-70m", n_layer=3)
model = GPT.from_name("pythia-14m", n_layer=3)
x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)

from torch._dynamo.backends import debugging
Expand Down
30 changes: 15 additions & 15 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ def test_config():
config = Config(block_size=2048)
assert config.block_size == 2048

config = Config.from_name("pythia-70m")
assert config.block_size == 2048
config = Config.from_name("pythia-14m")
assert config.block_size == 512

config = Config.from_name("pythia-70m", block_size=4096)
config = Config.from_name("pythia-14m", block_size=4096)
assert config.block_size == 4096

config = Config(hf_config={"name": "pythia-70m"})
assert config.name == "pythia-70m"
config = Config(hf_config={"name": "pythia-14m"})
assert config.name == "pythia-14m"


def test_legacy_args(tmp_path):
from lit_gpt import Config

config = Config.from_name("pythia-70m", condense_ratio=2)
config = Config.from_name("pythia-14m", condense_ratio=2)
assert not hasattr(config, "condense_ratio")
assert config.rope_condense_ratio == 2

Expand Down Expand Up @@ -103,25 +103,25 @@ def test_from_checkpoint(tmp_path):
Config.from_checkpoint(tmp_path / "non_existing_checkpoint")

# 2. If `lit_config.py` doesn't exists, but there is a matching config in `lit_gpt/config.py`.
config = Config.from_checkpoint(tmp_path / "pythia-70m")
assert config.name == "pythia-70m"
assert config.block_size == 2048
config = Config.from_checkpoint(tmp_path / "pythia-14m")
assert config.name == "pythia-14m"
assert config.block_size == 512
assert config.n_layer == 6

# 3. If only `lit_config.py` exists.
config_data = {"name": "pythia-70m", "block_size": 24, "n_layer": 2}
config_data = {"name": "pythia-14m", "block_size": 24, "n_layer": 2}
with open(tmp_path / "lit_config.json", "w") as file:
json.dump(config_data, file)
config = Config.from_checkpoint(tmp_path)
assert config.name == "pythia-70m"
assert config.name == "pythia-14m"
assert config.block_size == 24
assert config.n_layer == 2

# 4. Both `lit_config.py` and a matching config exist, but `lit_config.py` supersedes matching config
(tmp_path / "pythia-70m").mkdir()
with open(tmp_path / "pythia-70m/lit_config.json", "w") as file:
(tmp_path / "pythia-14m").mkdir()
with open(tmp_path / "pythia-14m/lit_config.json", "w") as file:
json.dump(config_data, file)
config = Config.from_checkpoint(tmp_path / "pythia-70m")
assert config.name == "pythia-70m"
config = Config.from_checkpoint(tmp_path / "pythia-14m")
assert config.name == "pythia-14m"
assert config.block_size == 24
assert config.n_layer == 2
4 changes: 2 additions & 2 deletions tests/test_convert_hf_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ def test_convert_hf_checkpoint(tmp_path):
from scripts.convert_hf_checkpoint import convert_hf_checkpoint

with pytest.raises(ValueError, match="to contain .bin"):
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m")

bin_file = tmp_path / "foo.bin"
bin_file.touch()
with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load:
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m")
load.assert_called_with(bin_file)

assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_gptq_blockwise_quantization():

fabric = L.Fabric(devices=1)
with fabric.init_module(empty_init=False):
model = GPT.from_name("pythia-70m", n_layer=2)
model = GPT.from_name("pythia-14m", n_layer=2)
x = torch.randint(0, 10, (2, model.config.block_size))

from quantize.gptq import blockwise_quantization
Expand Down
8 changes: 4 additions & 4 deletions tests/test_lm_eval_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ def test_run_eval(tmp_path, float_like):

fabric = Fabric(devices=1)
with fabric.init_module():
model = GPT.from_name("pythia-70m")
download_from_hub(repo_id="EleutherAI/pythia-70m", tokenizer_only=True, checkpoint_dir=tmp_path)
tokenizer = Tokenizer(tmp_path / "EleutherAI/pythia-70m")
model = GPT.from_name("pythia-14m")
download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path)
tokenizer = Tokenizer(tmp_path / "EleutherAI/pythia-14m")

eval_harness = EvalHarnessBase(fabric, model, tokenizer, 1)
results = eval_harness.run_eval(
Expand All @@ -32,7 +32,7 @@ def test_run_eval(tmp_path, float_like):
"bootstrap_iters": 2,
"device": ANY,
"limit": 2,
"model": "pythia-70m",
"model": "pythia-14m",
"no_cache": True,
"num_fewshot": 0,
},
Expand Down
4 changes: 2 additions & 2 deletions tests/test_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_lora_filter(tmp_path):
from lit_gpt.lora import GPT, lora_filter

fabric = Fabric(devices=1)
model = GPT.from_name("pythia-70m", n_layer=3, r=1, to_query=True, to_value=True)
model = GPT.from_name("pythia-14m", n_layer=3, r=1, to_query=True, to_value=True)
save_path = tmp_path / "model.pth"
fabric.save(save_path, {"model": model}, filter={"model": lora_filter})
saved = torch.load(save_path)["model"]
Expand Down Expand Up @@ -463,7 +463,7 @@ def test_lora_compile():
from lit_gpt.lora import GPT

model = GPT.from_name(
"pythia-70m",
"pythia-14m",
n_layer=3,
r=8,
alpha=8,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_merge_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir):
config = dict(block_size=128, padded_vocab_size=256, n_layer=3, n_head=8, n_embd=16)
with open(fake_checkpoint_dir / "lit_config.json", "w") as fp:
json.dump(config, fp)
base_model = GPT.from_name("pythia-70m", **config)
base_model = GPT.from_name("pythia-14m", **config)
state_dict = base_model.state_dict()
assert len(state_dict) == 40
torch.save(state_dict, fake_checkpoint_dir / "lit_model.pth")
lora_model = LoRAGPT.from_name("pythia-70m", **config, r=8, alpha=16, dropout=0.05, to_query=True, to_value=True)
lora_model = LoRAGPT.from_name("pythia-14m", **config, r=8, alpha=16, dropout=0.05, to_query=True, to_value=True)
state_dict = {k: v for k, v in lora_model.state_dict().items() if lora_filter(k, v)}
assert len(state_dict) == 6
lora_path = tmp_path / "lora"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def test_against_hf_mistral(device, dtype):
def test_model_compile():
from lit_gpt import GPT

model = GPT.from_name("pythia-70m", n_layer=3)
model = GPT.from_name("pythia-14m", n_layer=3)
x = torch.randint(model.config.vocab_size, size=(2, model.config.block_size), dtype=torch.int64)

from torch._dynamo.backends import debugging
Expand Down Expand Up @@ -448,7 +448,7 @@ def generate(logits):
def test_model_kv_cache_amp():
from lit_gpt.model import GPT, Config

config = Config.from_name("pythia-70m", n_layer=2)
config = Config.from_name("pythia-14m", n_layer=2)
model = GPT(config)
encoded = torch.arange(45)
model.set_kv_cache(batch_size=1)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,5 @@ def test_num_parameters_bitsandbytes(mode):
assert num_parameters(model) == 110

with fabric.init_module(empty_init=True):
model = GPT.from_name("pythia-70m")
assert num_parameters(model) == 70426624
model = GPT.from_name("pythia-14m")
assert num_parameters(model) == 14067712

0 comments on commit c72c562

Please sign in to comment.