diff --git a/README.md b/README.md
index fa7ecd2..e5f38bd 100644
--- a/README.md
+++ b/README.md
@@ -234,16 +234,16 @@ class EncoderLayer(nn.Module):
x = self.attention(q=x, k=x, v=x, mask=src_mask)
# 2. add and norm
- x = self.norm1(x + _x)
x = self.dropout1(x)
+ x = self.norm1(x + _x)
# 3. positionwise feed forward network
_x = x
x = self.ffn(x)
# 4. add and norm
- x = self.norm2(x + _x)
x = self.dropout2(x)
+ x = self.norm2(x + _x)
return x
```
@@ -298,8 +298,8 @@ class DecoderLayer(nn.Module):
x = self.self_attention(q=dec, k=dec, v=dec, mask=trg_mask)
# 2. add and norm
- x = self.norm1(x + _x)
x = self.dropout1(x)
+ x = self.norm1(x + _x)
if enc is not None:
# 3. compute encoder - decoder attention
@@ -307,16 +307,16 @@ class DecoderLayer(nn.Module):
x = self.enc_dec_attention(q=x, k=enc, v=enc, mask=src_mask)
# 4. add and norm
- x = self.norm2(x + _x)
x = self.dropout2(x)
+ x = self.norm2(x + _x)
# 5. positionwise feed forward network
_x = x
x = self.ffn(x)
# 6. add and norm
- x = self.norm3(x + _x)
x = self.dropout3(x)
+ x = self.norm3(x + _x)
return x
```