Skip to content

Commit

Permalink
first Readme
Browse files Browse the repository at this point in the history
  • Loading branch information
Wangt-CN committed Feb 27, 2020
1 parent 51a882a commit 1452352
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 99 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
# VC-R-CNN
The official pytorch implementation of ``Visual Commonsense R-CNN''
## Visual Commonsense R-CNN (VC R-CNN)

[![LICENSE](https://img.shields.io/badge/license-MIT-green)](https://github.com/yaoyao-liu/mnemonics/blob/master/LICENSE)
[![Python](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/)
![PyTorch](https://img.shields.io/badge/pytorch-1.2.0-%237732a8)

This repository contains the official PyTorch implementation for [CVPR 2020](http://cvpr2020.thecvf.com/) Paper "[Visual Commonsense R-CNN]()"(The given link is connected to a temporary version, which may be slightly different from our final camera-ready version) by [Tan Wang](https://wangt-cn.github.io/), Jianqiang Huang, [Hanwang Zhang](https://www.ntu.edu.sg/home/hanwangzhang/), and [Qianru Sun](https://qianrusun1015.github.io).

Code and feature will be available soon.
2 changes: 1 addition & 1 deletion vc_rcnn/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ MODEL:
MASK_ON: False
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_val",)
TEST: ("coco_2014_train",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
Expand Down
3 changes: 3 additions & 0 deletions vc_rcnn/vc_rcnn/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,9 @@
# The pre-prepared dictionary file path for intervention (numpy format)
_C.DIC_FILE = '/data4/vc/vc-rcnn-betterlr/maskrcnn-benchmark/model/dic_coco.npy'

# The pre-prepared dictionary file path for intervention (numpy format)
_C.FEATURE_SAVE_PATH = '/data4/vc/github_onlyy/vc_feature_github'

# The prior probability P(z)
_C.PRIOR_PROB = '/data4/vc/vc-rcnn-stat/stat_prob2.npy'

Expand Down
6 changes: 4 additions & 2 deletions vc_rcnn/vc_rcnn/data/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import torch
import torchvision
import h5py
import os
import numpy as np
from vc_rcnn.structures.bounding_box import BoxList
from vc_rcnn.structures.segmentation_mask import SegmentationMask
Expand Down Expand Up @@ -94,10 +95,11 @@ def __getitem__(self, idx):
# for feature extraction during testing (bottom up bbox here)
else:
image_id_bu = str(self.id_to_img_map[idx])
boxes = np.load(self.box_dir + image_id_bu + '.npy')
boxes = np.load(os.path.join(self.box_dir, image_id_bu) + '.npy')
num_box = boxes.shape[0]
boxes = torch.tensor(boxes)

# record the num of boxes in image to make sure the preprocess is right
num_box = [num_box for i in range(boxes.size(0))]
num_box = torch.tensor(num_box)
sizes = [[w, h] for i in range(boxes.size(0))]
Expand All @@ -107,7 +109,7 @@ def __getitem__(self, idx):
classes = [0 for i in range(boxes.size(0))]
classes = torch.tensor(classes)

# NOTE that the bounding box mode of bottom-up feature is different with COCO
# NOTE that the bounding box format of bottom-up feature is different with COCO
target = BoxList(boxes, img.size, mode="xyxy")
target.add_field("num_box", num_box)

Expand Down
100 changes: 7 additions & 93 deletions vc_rcnn/vc_rcnn/modeling/roi_heads/box_head/box_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .loss import make_roi_box_loss_evaluator
from .roi_box_predictors import make_causal_predictor
import torch.nn.functional as F
import pdb
import os
class ROIBoxHead(torch.nn.Module):
"""
Generic Box Head class.
Expand All @@ -23,6 +23,7 @@ def __init__(self, cfg, in_channels):
self.post_processor = make_roi_box_post_processor(cfg)
self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
self.causal_predictor = make_causal_predictor(cfg, self.feature_extractor.out_channels)
self.feature_save_path = cfg.FEATURE_SAVE_PATH

def forward(self, features, proposals, targets=None):
"""
Expand Down Expand Up @@ -53,12 +54,11 @@ def forward(self, features, proposals, targets=None):


if not self.training:
# result = self.post_processor((class_logits, box_regression), proposals)
# self.save_object_feature_coco(x, class_logits, targets)

result = self.post_processor_gt(x, class_logits, proposals)
# result = self.post_processor_gt_attn(x, class_logits, attn_list, proposals)
self.save_object_feature_gt_bu(x, result, targets)
# self.save_object_attn_gt(x, result, targets)

# save object feature
# self.save_object_feature_gt_bu(x, result, targets)

return x, result, {}

Expand All @@ -85,19 +85,6 @@ def post_processor_gt(self, x, class_logits, boxes):

return boxes

def post_processor_gt_attn(self, x, class_logits, attn_list, boxes):
# class_prob = F.softmax(class_logits, -1)
bbx_idx = torch.arange(0, class_logits.size(0))
# image_shapes = [box.size for box in boxes]
boxes_per_image = [len(box) for box in boxes]
# class_prob = class_prob.split(boxes_per_image, dim=0)
bbx_idx = bbx_idx.split(boxes_per_image, dim=0)

for i, (attn_image, bbx_idx_image) in enumerate(zip(attn_list, bbx_idx)):
boxes[i].add_field("attn", attn_image)
boxes[i].add_field("features", x[bbx_idx_image])

return boxes

def save_object_feature_gt_bu(self, x, result, targets):

Expand All @@ -106,84 +93,11 @@ def save_object_feature_gt_bu(self, x, result, targets):
try:
assert image.get_field("num_box")[0] == feature_pre_image.shape[0]
image_id = str(image.get_field("image_id")[0].cpu().numpy())
path = '/data4/vc/vc-rcnn-onlyy/vc-rcnn-onlyy/' + image_id +'.npy'
path = os.path.join(self.feature_save_path, image_id) +'.npy'
np.save(path, feature_pre_image)
except:
print(image)

def save_object_attn_gt(self, x, result, targets):
gpu_id = x.get_device()

with h5py.File('/data2/tingjia/wt/openimage/target_dir/coco/vc-feature/attn' + str(gpu_id) + '.hdf5', 'a') as f:
for i, image in enumerate(result):
# idx_pre_image = image.get_field("idx")
class_gtlabel_per_image = image.get_field("labels")
attn_pre_image = image.get_field("attn")
# class_label_per_image = image.get_field("labels_classify")

# feature_pre_image = image.get_field("features")
# if class_gtlabel_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:
# image_id_h5py = f.create_group(img_pths[i])

image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
image_id_h5py.create_dataset("attn", data=attn_pre_image.cpu())
image_id_h5py.create_dataset("class_gtlabel", data=class_gtlabel_per_image.cpu())
# image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())

original_size = targets[i].get_field("orignal_size")[0]
image = image.resize((original_size[0], original_size[1]))

image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())


def save_object_feature_gt(self, x, result, targets):
gpu_id = x.get_device()

with h5py.File('/data2/tingjia/wt/openimage/target_dir/coco/vc-feature/coco_test_all_vc_xy_10_100_' + str(gpu_id) + '.hdf5', 'a') as f:
for i, image in enumerate(result):
# idx_pre_image = image.get_field("idx")
class_gtlabel_per_image = image.get_field("labels")
# class_label_per_image = image.get_field("labels_classify")

feature_pre_image = image.get_field("features")
if class_gtlabel_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:
# image_id_h5py = f.create_group(img_pths[i])
if str(targets[i].get_field("image_id")[0].cpu().numpy()) in f:
del f[str(targets[i].get_field("image_id")[0].cpu().numpy())]
image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
image_id_h5py.create_dataset("feature", data=feature_pre_image.cpu())
image_id_h5py.create_dataset("class_gtlabel", data=class_gtlabel_per_image.cpu())
# image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())

original_size = targets[i].get_field("orignal_size")[0]
image = image.resize((original_size[0], original_size[1]))

image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())


def save_object_feature_coco(self, x, result, targets):
gpu_id = x.get_device()
if gpu_id == 0:
with h5py.File('/data2/tingjia/wt/openimage/target_dir/Openimages/coco_vctrain/coco_train_all_vc2.hdf5', 'a') as f:
for i, image in enumerate(result):
idx_pre_image = image.get_field("idx")
softscore_pre_image = image.get_field("soft_scores")
class_label_per_image = image.get_field("labels")

feature_pre_image = x[idx_pre_image]
if class_label_per_image.size(0) >= 1 and targets[i].get_field("image_id").size(0) > 0:

# image_id_h5py = f.create_group(img_pths[i])
# exist_id = str(targets[i].get_field("image_id")[0].cpu().numpy())
# if exist_id not in f.keys():
image_id_h5py = f.create_group(str(targets[i].get_field("image_id")[0].cpu().numpy()))
image_id_h5py.create_dataset("feature", data=feature_pre_image.cpu())
image_id_h5py.create_dataset("soft_label", data=softscore_pre_image.cpu())
image_id_h5py.create_dataset("class_label", data=class_label_per_image.cpu())

original_size = targets[i].get_field("orignal_size")[0]
image = image.resize((original_size[0], original_size[1]))
image_id_h5py.create_dataset("bbox", data=image.bbox.cpu())



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def z_dic(self, y, dic_z, prior):
Please note that we computer the intervention in the whole batch rather than for one object in the main paper.
"""
length = y.size(0)

if length == 1:
print('debug')
attention = torch.mm(self.Wy(y), self.Wz(dic_z).t()) / (self.embedding_size ** 0.5)
attention = F.softmax(attention, 1)
z_hat = attention.unsqueeze(2) * dic_z.unsqueeze(0)
Expand Down

0 comments on commit 1452352

Please sign in to comment.