diff --git a/README.md b/README.md
index fa7ecd2..e5f38bd 100644
--- a/README.md
+++ b/README.md
@@ -234,16 +234,16 @@ class EncoderLayer(nn.Module):
         x = self.attention(q=x, k=x, v=x, mask=src_mask)
         
         # 2. add and norm
-        x = self.norm1(x + _x)
         x = self.dropout1(x)
+        x = self.norm1(x + _x)
         
         # 3. positionwise feed forward network
         _x = x
         x = self.ffn(x)
       
         # 4. add and norm
-        x = self.norm2(x + _x)
         x = self.dropout2(x)
+        x = self.norm2(x + _x)
         return x
 ```
 <br>
@@ -298,8 +298,8 @@ class DecoderLayer(nn.Module):
         x = self.self_attention(q=dec, k=dec, v=dec, mask=trg_mask)
         
         # 2. add and norm
-        x = self.norm1(x + _x)
         x = self.dropout1(x)
+        x = self.norm1(x + _x)
 
         if enc is not None:
             # 3. compute encoder - decoder attention
@@ -307,16 +307,16 @@ class DecoderLayer(nn.Module):
             x = self.enc_dec_attention(q=x, k=enc, v=enc, mask=src_mask)
             
             # 4. add and norm
-            x = self.norm2(x + _x)
             x = self.dropout2(x)
+            x = self.norm2(x + _x)
 
         # 5. positionwise feed forward network
         _x = x
         x = self.ffn(x)
         
         # 6. add and norm
-        x = self.norm3(x + _x)
         x = self.dropout3(x)
+        x = self.norm3(x + _x)
         return x
 ```
 <br>