Skip to content

Commit

Permalink
remove weight init, not needed at this scale
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Aug 20, 2022
1 parent 0a19a59 commit d26d975
Showing 1 changed file with 0 additions and 15 deletions.
15 changes: 0 additions & 15 deletions makemore.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,25 +132,10 @@ def __init__(self, config):
))
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

# init all weights
self.apply(self._init_weights)

# report number of parameters (note we don't count the decoder parameters in lm_head)
n_params = sum(p.numel() for p in self.transformer.parameters())
print("number of parameters: %.2fM" % (n_params/1e6,))

def _init_weights(self, module):
# TODO is this function needed?
if isinstance(module, nn.Linear):
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
if module.bias is not None:
torch.nn.init.zeros_(module.bias)
elif isinstance(module, nn.Embedding):
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
elif isinstance(module, nn.LayerNorm):
torch.nn.init.zeros_(module.bias)
torch.nn.init.ones_(module.weight)

def configure_optimizers(self, train_config):
optimizer = torch.optim.AdamW(self.parameters(), lr=train_config.learning_rate,
betas=train_config.betas, weight_decay=train_config.weight_decay,
Expand Down

0 comments on commit d26d975

Please sign in to comment.