Skip to content

Commit

Permalink
Add CI tests for chapter 7 (rasbt#239)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt authored Jun 22, 2024
1 parent b90c7ad commit ec5baa1
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Configs and keys
ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
ch07/02_dataset-utilities/config.json
ch07/03_model-evaluation/config.json

Expand Down Expand Up @@ -36,6 +35,8 @@ ch06/02_bonus_additional-experiments/gpt2
ch06/03_bonus_imdb-classification/gpt2

ch07/01_main-chapter-code/gpt2-medium355M-sft.pth
ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
ch07/01_main-chapter-code/Smalltestmodel-sft-standalone.pth
ch07/01_main-chapter-code/gpt2/

# Datasets
Expand Down
85 changes: 63 additions & 22 deletions ch07/01_main-chapter-code/gpt_instruction_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses):
# plt.show()


def main():
def main(test_mode=False):
#######################################
# Print package versions
#######################################
Expand Down Expand Up @@ -177,6 +177,12 @@ def main():
test_data = data[train_portion:train_portion + test_portion]
val_data = data[train_portion + test_portion:]

# Use very small subset for testing purposes
if args.test_mode:
train_data = train_data[:10]
val_data = val_data[:10]
test_data = test_data[:10]

print("Training set length:", len(train_data))
print("Validation set length:", len(val_data))
print("Test set length:", len(test_data))
Expand Down Expand Up @@ -217,31 +223,50 @@ def main():
#######################################
# Load pretrained model
#######################################
BASE_CONFIG = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Context length
"drop_rate": 0.0, # Dropout rate
"qkv_bias": True # Query-key-value bias
}

model_configs = {
"gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
"gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
"gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
"gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}
# Small GPT model for testing purposes
if args.test_mode:
BASE_CONFIG = {
"vocab_size": 50257,
"context_length": 120,
"drop_rate": 0.0,
"qkv_bias": False,
"emb_dim": 12,
"n_layers": 1,
"n_heads": 2
}
model = GPTModel(BASE_CONFIG)
model.eval()
device = "cpu"
CHOOSE_MODEL = "Small test model"

# Code as it is used in the main chapter
else:
BASE_CONFIG = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Context length
"drop_rate": 0.0, # Dropout rate
"qkv_bias": True # Query-key-value bias
}

model_configs = {
"gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
"gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
"gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
"gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

CHOOSE_MODEL = "gpt2-medium (355M)"
CHOOSE_MODEL = "gpt2-medium (355M)"

BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
BASE_CONFIG.update(model_configs[CHOOSE_MODEL])

model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")

model = GPTModel(BASE_CONFIG)
load_weights_into_gpt(model, params)
model.eval()
model.to(device)
model = GPTModel(BASE_CONFIG)
load_weights_into_gpt(model, params)
model.eval()
model.to(device)

print("Loaded model:", CHOOSE_MODEL)
print(50*"-")
Expand All @@ -259,6 +284,7 @@ def main():

start_time = time.time()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)

num_epochs = 2

torch.manual_seed(123)
Expand Down Expand Up @@ -307,4 +333,19 @@ def main():


if __name__ == "__main__":
main()

import argparse

parser = argparse.ArgumentParser(
description="Finetune a GPT model for classification"
)
parser.add_argument(
"--test_mode",
default=False,
action="store_true",
help=("This flag runs the model in test mode for internal testing purposes. "
"Otherwise, it runs the model as it is used in the chapter (recommended).")
)
args = parser.parse_args()

main(args.test_mode)
16 changes: 16 additions & 0 deletions ch07/01_main-chapter-code/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

# File for internal use (unit tests)


import subprocess


def test_gpt_class_finetune():
command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"]

result = subprocess.run(command, capture_output=True, text=True)
assert result.returncode == 0, f"Script exited with errors: {result.stderr}"

0 comments on commit ec5baa1

Please sign in to comment.