Skip to content

Commit

Permalink
add gpt2 training config
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Feb 3, 2023
1 parent e170e40 commit f9348f3
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions config/train_gpt2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# config for training GPT-2 (124M) down to very nice loss of ~2.85 on 1 node of 8X A100 40GB

wandb_log = True
wandb_project = 'owt'
wandb_run_name='gpt2-124M'

# these make the total batch size be ~0.5M
# 12 batch size * 1024 block size * 5 gradaccum * 8 GPUs = 491,520
batch_size = 12
block_size = 1024
gradient_accumulation_steps = 5

# this makes total number of tokens be 300B
max_iters = 600000
lr_decay_iters = 600000

# eval stuff
eval_interval = 1000
eval_iters = 200
log_interval = 10

# weight decay
weight_decay = 1e-1

0 comments on commit f9348f3

Please sign in to comment.