first Readme

Wangt-CN · Feb 27, 2020 · 1452352 · 1452352
1 parent 51a882a
commit 1452352
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 99 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,9 @@
-# VC-R-CNN
-The official pytorch implementation of ``Visual Commonsense R-CNN''
+## Visual Commonsense R-CNN (VC R-CNN)
+
+[![LICENSE](https://img.shields.io/badge/license-MIT-green)](https://github.com/yaoyao-liu/mnemonics/blob/master/LICENSE)
+[![Python](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/)
+![PyTorch](https://img.shields.io/badge/pytorch-1.2.0-%237732a8)
+
+This repository contains the official PyTorch implementation for [CVPR 2020](http://cvpr2020.thecvf.com/) Paper "[Visual Commonsense R-CNN]()"(The given link is connected to a temporary version, which may be slightly different from our final camera-ready version) by [Tan Wang](https://wangt-cn.github.io/), Jianqiang Huang, [Hanwang Zhang](https://www.ntu.edu.sg/home/hanwangzhang/), and [Qianru Sun](https://qianrusun1015.github.io).
+
+Code and feature will be available soon.
diff --git a/vc_rcnn/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml b/vc_rcnn/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml
@@ -31,7 +31,7 @@ MODEL:
   MASK_ON: False
 DATASETS:
   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
-  TEST: ("coco_2014_val",)
+  TEST: ("coco_2014_train",)
 DATALOADER:
   SIZE_DIVISIBILITY: 32
 SOLVER:

diff --git a/vc_rcnn/vc_rcnn/config/defaults.py b/vc_rcnn/vc_rcnn/config/defaults.py
@@ -478,6 +478,9 @@
 # The pre-prepared dictionary file path for intervention (numpy format)
 _C.DIC_FILE = '/data4/vc/vc-rcnn-betterlr/maskrcnn-benchmark/model/dic_coco.npy'
 
+# The pre-prepared dictionary file path for intervention (numpy format)
+_C.FEATURE_SAVE_PATH = '/data4/vc/github_onlyy/vc_feature_github'
+
 # The prior probability P(z)
 _C.PRIOR_PROB = '/data4/vc/vc-rcnn-stat/stat_prob2.npy'
 

diff --git a/vc_rcnn/vc_rcnn/data/datasets/coco.py b/vc_rcnn/vc_rcnn/data/datasets/coco.py
@@ -2,6 +2,7 @@
 import torch
 import torchvision
 import h5py
+import os
 import numpy as np
 from vc_rcnn.structures.bounding_box import BoxList
 from vc_rcnn.structures.segmentation_mask import SegmentationMask
@@ -94,10 +95,11 @@ def __getitem__(self, idx):
         # for feature extraction during testing (bottom up bbox here)
         else:
             image_id_bu = str(self.id_to_img_map[idx])
-            boxes = np.load(self.box_dir + image_id_bu + '.npy')
+            boxes = np.load(os.path.join(self.box_dir, image_id_bu) + '.npy')
             num_box = boxes.shape[0]
             boxes = torch.tensor(boxes)
 
+            # record the num of boxes in image to make sure the preprocess is right
             num_box = [num_box for i in range(boxes.size(0))]
             num_box = torch.tensor(num_box)
             sizes = [[w, h] for i in range(boxes.size(0))]
@@ -107,7 +109,7 @@ def __getitem__(self, idx):
             classes = [0 for i in range(boxes.size(0))]
             classes = torch.tensor(classes)
 
-            # NOTE that the bounding box mode of bottom-up feature is different with COCO
+            # NOTE that the bounding box format of bottom-up feature is different with COCO
             target = BoxList(boxes, img.size, mode="xyxy")
             target.add_field("num_box", num_box)
 

diff --git a/vc_rcnn/vc_rcnn/modeling/roi_heads/box_head/box_head.py b/vc_rcnn/vc_rcnn/modeling/roi_heads/box_head/box_head.py
@@ -9,7 +9,7 @@
 from .loss import make_roi_box_loss_evaluator
 from .roi_box_predictors import make_causal_predictor
 import torch.nn.functional as F
-import pdb
+import os
 class ROIBoxHead(torch.nn.Module):
     """
     Generic Box Head class.
@@ -23,6 +23,7 @@ def __init__(self, cfg, in_channels):
         self.post_processor = make_roi_box_post_processor(cfg)
         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
         self.causal_predictor = make_causal_predictor(cfg, self.feature_extractor.out_channels)
+        self.feature_save_path = cfg.FEATURE_SAVE_PATH
 
     def forward(self, features, proposals, targets=None):
         """
@@ -53,12 +54,11 @@ def forward(self, features, proposals, targets=None):
 
 
         if not self.training:
-            # result = self.post_processor((class_logits, box_regression), proposals)
-            # self.save_object_feature_coco(x, class_logits, targets)
+
             result = self.post_processor_gt(x, class_logits, proposals)
-            # result = self.post_processor_gt_attn(x, class_logits, attn_list, proposals)
-            self.save_object_feature_gt_bu(x, result, targets)
-            # self.save_object_attn_gt(x, result, targets)
+
+            # save object feature
+            # self.save_object_feature_gt_bu(x, result, targets)
 
             return x, result, {}
 
@@ -85,19 +85,6 @@ def post_processor_gt(self, x, class_logits, boxes):
 
         return boxes
 
-    def post_processor_gt_attn(self, x, class_logits, attn_list, boxes):
-        # class_prob = F.softmax(class_logits, -1)
-        bbx_idx = torch.arange(0, class_logits.size(0))
-        # image_shapes = [box.size for box in boxes]
-        boxes_per_image = [len(box) for box in boxes]
-        # class_prob = class_prob.split(boxes_per_image, dim=0)
-        bbx_idx = bbx_idx.split(boxes_per_image, dim=0)
-
-        for i, (attn_image, bbx_idx_image) in enumerate(zip(attn_list, bbx_idx)):
-            boxes[i].add_field("attn", attn_image)
-            boxes[i].add_field("features", x[bbx_idx_image])
-
-        return boxes
 
     def save_object_feature_gt_bu(self, x, result, targets):
 
@@ -106,84 +93,11 @@ def save_object_feature_gt_bu(self, x, result, targets):
             try:
                 assert image.get_field("num_box")[0] == feature_pre_image.shape[0]
                 image_id = str(image.get_field("image_id")[0].cpu().numpy())
-                path = '/data4/vc/vc-rcnn-onlyy/vc-rcnn-onlyy/' + image_id +'.npy'
+                path = os.path.join(self.feature_save_path, image_id) +'.npy'
                 np.save(path, feature_pre_image)
             except:
                 print(image)
 
-    def save_object_attn_gt(self, x, result, targets):
-        gpu_id = x.get_device()
-
-        with h5py.File('/data2/tingjia/wt/openimage/target_dir/coco/vc-feature/attn' + str(gpu_id) + '.hdf5', 'a') as f:
-            for i, image in enumerate(result):
-                # idx_pre_image = image.get_field("idx")
-                class_gtlabel_per_image = image.get_field("labels")
-                attn_pre_image = image.get_field("attn")
-                # class_label_per_image = image.get_field("labels_classify")
-
-                # feature_pre_image = image.get_field("features")
-                # if class_gtlabel_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:
-                    # image_id_h5py = f.create_group(img_pths[i])
-
-                image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
-                image_id_h5py.create_dataset("attn", data=attn_pre_image.cpu())
-                image_id_h5py.create_dataset("class_gtlabel", data=class_gtlabel_per_image.cpu())
-                # image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())
-
-                original_size = targets[i].get_field("orignal_size")[0]
-                image = image.resize((original_size[0], original_size[1]))
-
-                image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())
-
-
-    def save_object_feature_gt(self, x, result, targets):
-        gpu_id = x.get_device()
-
-        with h5py.File('/data2/tingjia/wt/openimage/target_dir/coco/vc-feature/coco_test_all_vc_xy_10_100_' + str(gpu_id) + '.hdf5', 'a') as f:
-            for i, image in enumerate(result):
-                # idx_pre_image = image.get_field("idx")
-                class_gtlabel_per_image = image.get_field("labels")
-                # class_label_per_image = image.get_field("labels_classify")
-
-                feature_pre_image = image.get_field("features")
-                if class_gtlabel_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:
-                    # image_id_h5py = f.create_group(img_pths[i])
-                    if str(targets[i].get_field("image_id")[0].cpu().numpy()) in f:
-                        del f[str(targets[i].get_field("image_id")[0].cpu().numpy())]
-                    image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
-                    image_id_h5py.create_dataset("feature", data=feature_pre_image.cpu())
-                    image_id_h5py.create_dataset("class_gtlabel", data=class_gtlabel_per_image.cpu())
-                    # image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())
-
-                    original_size = targets[i].get_field("orignal_size")[0]
-                    image = image.resize((original_size[0], original_size[1]))
-
-                    image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())
-
-
-    def save_object_feature_coco(self, x, result, targets):
-        gpu_id = x.get_device()
-        if gpu_id == 0:
-            with h5py.File('/data2/tingjia/wt/openimage/target_dir/Openimages/coco_vctrain/coco_train_all_vc2.hdf5', 'a') as f:
-                for i, image in enumerate(result):
-                    idx_pre_image = image.get_field("idx")
-                    softscore_pre_image = image.get_field("soft_scores")
-                    class_label_per_image = image.get_field("labels")
-
-                    feature_pre_image = x[idx_pre_image]
-                    if class_label_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:
-
-                        # image_id_h5py = f.create_group(img_pths[i])
-                        # exist_id = str(targets[i].get_field("image_id")[0].cpu().numpy())
-                        # if exist_id not in f.keys():
-                        image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
-                        image_id_h5py.create_dataset("feature", data=feature_pre_image.cpu())
-                        image_id_h5py.create_dataset("soft_label", data=softscore_pre_image.cpu())
-                        image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())
-
-                        original_size = targets[i].get_field("orignal_size")[0]
-                        image = image.resize((original_size[0], original_size[1]))
-                        image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())
 
 
 

diff --git a/vc_rcnn/vc_rcnn/modeling/roi_heads/box_head/roi_box_predictors.py b/vc_rcnn/vc_rcnn/modeling/roi_heads/box_head/roi_box_predictors.py
@@ -106,7 +106,8 @@ def z_dic(self, y, dic_z, prior):
         Please note that we computer the intervention in the whole batch rather than for one object in the main paper.
         """
         length = y.size(0)
-
+        if length == 1:
+            print('debug')
         attention = torch.mm(self.Wy(y), self.Wz(dic_z).t()) / (self.embedding_size ** 0.5)
         attention = F.softmax(attention, 1)
         z_hat = attention.unsqueeze(2) * dic_z.unsqueeze(0)