use msrvtt-train-9k

open-mmlab · cir7 · Sep 7, 2023 · Aug 2, 2023 · Aug 2, 2023 · Aug 7, 2023
commit 7c40e45d76bdedadfcfa849cad248ebe03dfb9df
diff --git a/configs/multimodal/vindlu/vindlu_ret_train.py b/configs/multimodal/vindlu/vindlu_ret_train.py
@@ -1,7 +1,7 @@
 _base_ = ['../../_base_/default_runtime.py']
 
 video_root = 'data/msrvtt/msrvtt_2fps_224'
-anno_file_train = 'data/msrvtt/anno_downstream/msrvtt_ret_train7k.json'
+anno_file_train = 'data/msrvtt/anno_downstream/msrvtt_ret_train9k.json'
 anno_file_test = 'data/msrvtt/anno_downstream/msrvtt_ret_test1k.json'
 pretrained_ckpt_path = 'checkpoints/5M-pretrain.pth'
 
@@ -106,7 +106,7 @@
 dataset_type = 'MSRVTT_Ret'
 
 train_dataloader = dict(
-    batch_size=16,
+    batch_size=32,
     num_workers=8,
     persistent_workers=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
@@ -177,3 +177,5 @@
 auto_scale_lr = dict(enable=True, base_batch_size=128)
 
 find_unused_parameters=True
+
+custom_hooks = [dict(type='EmptyCacheHook', after_epoch=True)]
diff --git a/mmaction/models/multimodal/vindlu_ret.py b/mmaction/models/multimodal/vindlu_ret.py
@@ -379,12 +379,10 @@ def compute_score_matrix_i2t(self, img_feats, img_embeds, text_feats,
         """
         use_sim = False
         # compute i2t sim matrix
-        # print(f'mem 0 {torch.cuda.memory_allocated()// (1024*1024)}')
         sim_matrix_i2t = img_feats @ text_feats.t()
 
         score_matrix_i2t = torch.full((img_feats.size(0), text_feats.size(0)),
                                       -100.0).to(self.device)
-        # print(f'mem 1 {torch.cuda.memory_allocated()// (1024*1024)}')
         for i in track_on_main_process(
                 range(img_feats.size(0)), 'Compute I2T scores...'):
             sims = sim_matrix_i2t[i]
@@ -408,8 +406,6 @@ def compute_score_matrix_i2t(self, img_feats, img_embeds, text_feats,
                     )
                     score = self.itm_head(output.last_hidden_state[:, 0, :])[:, 1]
                     score_matrix_i2t[i, batch_topk] = score
-        #             print(f'mem 2 {torch.cuda.memory_allocated()// (1024*1024)}')
-        # print(f'mem 3 {torch.cuda.memory_allocated()// (1024*1024)}')
         return score_matrix_i2t
 
     def compute_score_matrix_t2i(self, img_feats, img_embeds, text_feats,