decoder positional embedding needs to be reapplied https://twitter.co…

…m/giffmana/status/1479195631587631104
VNOI · Jan 6, 2022 · 1cc0f18 · 1cc0f18
1 parent 28eaba6
commit 1cc0f18
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 7 deletions.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vit-pytorch',
   packages = find_packages(exclude=['examples']),
-  version = '0.26.2',
+  version = '0.26.3',
   license='MIT',
   description = 'Vision Transformer (ViT) - Pytorch',
   author = 'Phil Wang',

diff --git a/vit_pytorch/mae.py b/vit_pytorch/mae.py
@@ -14,13 +14,11 @@ def __init__(
         masking_ratio = 0.75,
         decoder_depth = 1,
         decoder_heads = 8,
-        decoder_dim_head = 64,
-        apply_decoder_pos_emb_all = False # whether to (re)apply decoder positional embedding to encoder unmasked tokens
+        decoder_dim_head = 64
     ):
         super().__init__()
         assert masking_ratio > 0 and masking_ratio < 1, 'masking ratio must be kept between 0 and 1'
         self.masking_ratio = masking_ratio
-        self.apply_decoder_pos_emb_all = apply_decoder_pos_emb_all
 
         # extract some hyperparameters and functions from encoder (vision transformer to be trained)
 
@@ -73,10 +71,9 @@ def forward(self, img):
 
         decoder_tokens = self.enc_to_dec(encoded_tokens)
 
-        # reapply decoder position embedding to unmasked tokens, if desired
+        # reapply decoder position embedding to unmasked tokens
 
-        if self.apply_decoder_pos_emb_all:
-            decoder_tokens = decoder_tokens + self.decoder_pos_emb(unmasked_indices)
+        decoder_tokens = decoder_tokens + self.decoder_pos_emb(unmasked_indices)
 
         # repeat mask tokens for number of masked, and add the positions using the masked indices derived above