clean

kuaitoukid · Jan 15, 2019 · ee847a3 · ee847a3
1 parent 9e0e4bf
commit ee847a3
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 222 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 
 ## Overview
 
-This repository is the implementation code of the paper "DenseFusion: 6D Object Pose Estimation by Iterative Dense Fusion"([arXiv](), [website](https://sites.google.com/view/densefusion), [video](https://www.youtube.com/watch?v=SsE5-FuK5jo)) by Chen et al. The model takes an RGB-D image as input and predicts the 6D pose of the each object in the frame. This network is implemented using [PyTorch](https://pytorch.org/) and the rest of the framework is in Python. Since this project focuses on the 6D pose estimation process, we do not specifically limit the choice of the segmentation models. You can choose your preferred semantic-segmentation/instance-segmentation methods according to your needs. In this repo, we provide our full implementation code of the DenseFusion model, Iterative Refinement model and a vanilla SegNet semantic-segmentation model used in our real-robot grasping experiment. The ROS code of the real robot grasping experiment is not included.
+This repository is the implementation code of the paper "DenseFusion: 6D Object Pose Estimation by Iterative Dense Fusion"([arXiv](), [Project](https://sites.google.com/view/densefusion), [Video](https://www.youtube.com/watch?v=SsE5-FuK5jo)) by Chen et al. at [Stanford Vision and Learning Lab](http://svl.stanford.edu/). The model takes an RGB-D image as input and predicts the 6D pose of the each object in the frame. This network is implemented using [PyTorch](https://pytorch.org/) and the rest of the framework is in Python. Since this project focuses on the 6D pose estimation process, we do not specifically limit the choice of the segmentation models. You can choose your preferred semantic-segmentation/instance-segmentation methods according to your needs. In this repo, we provide our full implementation code of the DenseFusion model, Iterative Refinement model and a vanilla SegNet semantic-segmentation model used in our real-robot grasping experiment. The ROS code of the real robot grasping experiment is not included.
 
 
 ## Requirements
@@ -56,7 +56,7 @@ This repository is the implementation code of the paper "DenseFusion: 6D Object
 	* **lib/loss_refiner.py**: Loss calculation for iterative refinement model.
 	* **lib/transformations.py**: [Transformation Function Library](https://www.lfd.uci.edu/~gohlke/code/transformations.py.html).
     * **lib/network.py**: Network architecture.
-    * **lib/extractors.py**: Encoder network architecture adapted from [pspnet-pytorch](https://github.com/Lextal/pspnet-pytorch)
+    * **lib/extractors.py**: Encoder network architecture adapted from [pspnet-pytorch](https://github.com/Lextal/pspnet-pytorch).
     * **lib/pspnet.py**: Decoder network architecture.
     * **lib/utils.py**: Logger code.
     * **lib/knn/**: CUDA K-nearest neighbours library adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda).
@@ -134,7 +134,7 @@ Please run:
 ```
 ./experiments/scripts/eval_ycb.sh
 ```
-This script will first download the `YCB_Video_toolbox` to the root folder of this repo and test the selected DenseFusion and Iterative Refinement models on the 2949 keyframes of the 10 testing video in YCB_Video Dataset with the same segmentation result of PoseCNN. The result without refinement is stored in `eval_result/ycb/Densefusion_wo_refine_result` and the refined result is in `eval_result/ycb/Densefusion_iterative_result`.
+This script will first download the `YCB_Video_toolbox` to the root folder of this repo and test the selected DenseFusion and Iterative Refinement models on the 2949 keyframes of the 10 testing video in YCB_Video Dataset with the same segmentation result of PoseCNN. The result without refinement is stored in `experiments/eval_result/ycb/Densefusion_wo_refine_result` and the refined result is in `experiments/eval_result/ycb/Densefusion_iterative_result`.
 
 After that, you can add the path of `experiments/eval_result/ycb/Densefusion_wo_refine_result/` and `experiments/eval_result/ycb/Densefusion_iterative_result/` to the code `YCB_Video_toolbox/evaluate_poses_keyframe.m` and run it with [MATLAB](https://www.mathworks.com/products/matlab.html). The code `YCB_Video_toolbox/plot_accuracy_keyframe.m` can show you the comparsion plot result. You can easily make it by copying the adapted codes from the `replace_ycb_toolbox/` folder and replace them in the `YCB_Video_toolbox/` folder. But you might still need to change the path of your `YCB_Video Dataset/` in the `globals.m` and copy two result folders(`Densefusion_wo_refine_result/` and `Densefusion_iterative_result/`) to the `YCB_Video_toolbox/` folder. 
 

diff --git a/lib/extractors.py b/lib/extractors.py
@@ -4,32 +4,17 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.utils import model_zoo
-from torchvision.models.densenet import densenet121, densenet161
-from torchvision.models.squeezenet import squeezenet1_1
-
 
 def load_weights_sequential(target, source_state):
     new_dict = OrderedDict()
     for (k1, v1), (k2, v2) in zip(target.state_dict().items(), source_state.items()):
         new_dict[k1] = v2
     target.load_state_dict(new_dict)
 
-
-model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
-
-
 def conv3x3(in_planes, out_planes, stride=1, dilation=1):
     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                      padding=dilation, dilation=dilation, bias=False)
 
-
 class BasicBlock(nn.Module):
     expansion = 1
 
@@ -60,7 +45,6 @@ def forward(self, x):
 
 class Bottleneck(nn.Module):
     expansion = 4
-
     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
@@ -140,207 +124,22 @@ def forward(self, x):
         return x, x_3
 
 
-
-class _DenseLayer(nn.Sequential):
-    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
-        super(_DenseLayer, self).__init__()
-        self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
-        self.add_module('relu.1', nn.ReLU(inplace=True)),
-        self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
-                                            growth_rate, kernel_size=1, stride=1, bias=False)),
-        self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
-        self.add_module('relu.2', nn.ReLU(inplace=True)),
-        self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
-                                            kernel_size=3, stride=1, padding=1, bias=False)),
-        self.drop_rate = drop_rate
-
-    def forward(self, x):
-        new_features = super(_DenseLayer, self).forward(x)
-        if self.drop_rate > 0:
-            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
-        return torch.cat([x, new_features], 1)
-
-
-class _DenseBlock(nn.Sequential):
-    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
-        super(_DenseBlock, self).__init__()
-        for i in range(num_layers):
-            layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
-            self.add_module('denselayer%d' % (i + 1), layer)
-
-
-class _Transition(nn.Sequential):
-    def __init__(self, num_input_features, num_output_features, downsample=True):
-        super(_Transition, self).__init__()
-        self.add_module('norm', nn.BatchNorm2d(num_input_features))
-        self.add_module('relu', nn.ReLU(inplace=True))
-        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
-                                          kernel_size=1, stride=1, bias=False))
-        if downsample:
-            self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
-        else:
-            self.add_module('pool', nn.AvgPool2d(kernel_size=1, stride=1))  # compatibility hack
-
-
-class DenseNet(nn.Module):
-    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
-                 num_init_features=64, bn_size=4, drop_rate=0, pretrained=True):
-
-        super(DenseNet, self).__init__()
-
-        # First convolution
-        self.start_features = nn.Sequential(OrderedDict([
-            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
-            ('norm0', nn.BatchNorm2d(num_init_features)),
-            ('relu0', nn.ReLU(inplace=True)),
-            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
-        ]))
-
-        # Each denseblock
-        num_features = num_init_features
-
-        init_weights = list(densenet121(pretrained=True).features.children())
-        start = 0
-        for i, c in enumerate(self.start_features.children()):
-            if pretrained:
-                c.load_state_dict(init_weights[i].state_dict())
-            start += 1
-        self.blocks = nn.ModuleList()
-        for i, num_layers in enumerate(block_config):
-            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
-                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
-            if pretrained:
-                block.load_state_dict(init_weights[start].state_dict())
-            start += 1
-            self.blocks.append(block)
-            setattr(self, 'denseblock%d' % (i + 1), block)
-
-            num_features = num_features + num_layers * growth_rate
-            if i != len(block_config) - 1:
-                downsample = i < 1
-                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2,
-                                    downsample=downsample)
-                if pretrained:
-                    trans.load_state_dict(init_weights[start].state_dict())
-                start += 1
-                self.blocks.append(trans)
-                setattr(self, 'transition%d' % (i + 1), trans)
-                num_features = num_features // 2
-
-    def forward(self, x):
-        out = self.start_features(x)
-        deep_features = None
-        for i, block in enumerate(self.blocks):
-            out = block(out)
-            if i == 5:
-                deep_features = out
-
-        return out, deep_features
-
-
-class Fire(nn.Module):
-
-    def __init__(self, inplanes, squeeze_planes,
-                 expand1x1_planes, expand3x3_planes, dilation=1):
-        super(Fire, self).__init__()
-        self.inplanes = inplanes
-        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
-        self.squeeze_activation = nn.ReLU(inplace=True)
-        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
-                                   kernel_size=1)
-        self.expand1x1_activation = nn.ReLU(inplace=True)
-        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
-                                   kernel_size=3, padding=dilation, dilation=dilation)
-        self.expand3x3_activation = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.squeeze_activation(self.squeeze(x))
-        return torch.cat([
-            self.expand1x1_activation(self.expand1x1(x)),
-            self.expand3x3_activation(self.expand3x3(x))
-        ], 1)
-
-
-class SqueezeNet(nn.Module):
-
-    def __init__(self, pretrained=False):
-        super(SqueezeNet, self).__init__()
-
-        self.feat_1 = nn.Sequential(
-            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
-            nn.ReLU(inplace=True)
-        )
-        self.feat_2 = nn.Sequential(
-            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
-            Fire(64, 16, 64, 64),
-            Fire(128, 16, 64, 64)
-        )
-        self.feat_3 = nn.Sequential(
-            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
-            Fire(128, 32, 128, 128, 2),
-            Fire(256, 32, 128, 128, 2)
-        )
-        self.feat_4 = nn.Sequential(
-            Fire(256, 48, 192, 192, 4),
-            Fire(384, 48, 192, 192, 4),
-            Fire(384, 64, 256, 256, 4),
-            Fire(512, 64, 256, 256, 4)
-        )
-        if pretrained:
-            weights = squeezenet1_1(pretrained=True).features.state_dict()
-            load_weights_sequential(self, weights)
-
-    def forward(self, x):
-        f1 = self.feat_1(x)
-        f2 = self.feat_2(f1)
-        f3 = self.feat_3(f2)
-        f4 = self.feat_4(f3)
-        return f4, f3
-
-
-'''
-    Handy methods for construction
-'''
-
-
-def squeezenet(pretrained=True):
-    return SqueezeNet(pretrained)
-
-
-def densenet(pretrained=True):
-    return DenseNet(pretrained=pretrained)
-
-
-def resnet18(pretrained=True):
+def resnet18(pretrained=False):
     model = ResNet(BasicBlock, [2, 2, 2, 2])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet18']))
     return model
 
-
-def resnet34(pretrained=True):
+def resnet34(pretrained=False):
     model = ResNet(BasicBlock, [3, 4, 6, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet34']))
     return model
 
-
-def resnet50(pretrained=True):
+def resnet50(pretrained=False):
     model = ResNet(Bottleneck, [3, 4, 6, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet50']))
     return model
 
-
-def resnet101(pretrained=True):
+def resnet101(pretrained=False):
     model = ResNet(Bottleneck, [3, 4, 23, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet101']))
     return model
 
-
-def resnet152(pretrained=True):
+def resnet152(pretrained=False):
     model = ResNet(Bottleneck, [3, 8, 36, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet152']))
-    return model
+    return model
diff --git a/lib/loss_refiner.py b/lib/loss_refiner.py
@@ -59,7 +59,6 @@ def loss_calculation(pred_r, pred_t, target, model_points, idx, points, num_poin
     ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
     new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
 
-
     # print('------------> ', dis.item(), idx[0].item())
 
     return dis, new_points.detach(), new_target.detach()

diff --git a/lib/network.py b/lib/network.py
@@ -17,8 +17,6 @@
 from lib.pspnet import PSPNet
 
 psp_models = {
-    'squeezenet': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='squeezenet'),
-    'densenet': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=1024, deep_features_size=512, backend='densenet'),
     'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'),
     'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'),
     'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'),
@@ -126,10 +124,7 @@ def forward(self, img, x, choose, obj):
         out_rx = torch.index_select(rx[b], 0, obj[b])
         out_tx = torch.index_select(tx[b], 0, obj[b])
         out_cx = torch.index_select(cx[b], 0, obj[b])
-        # for b in range(1, bs):
-        #     out_rx = torch.cat((out_rx, torch.index_select(rx[b], 0, obj[b])), dim=0)
-        #     out_tx = torch.cat((out_tx, torch.index_select(tx[b], 0, obj[b])), dim=0)
-        #     out_cx = torch.cat((out_cx, torch.index_select(cx[b], 0, obj[b])), dim=0)
+
         out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
         out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
         out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
@@ -208,8 +203,4 @@ def forward(self, x, emb, obj):
         out_rx = torch.index_select(rx[b], 0, obj[b])
         out_tx = torch.index_select(tx[b], 0, obj[b])
 
-        # for b in range(1, bs):
-        #     out_rx = torch.cat((out_rx, torch.index_select(rx[b], 0, obj[b])), dim=0)
-        #     out_tx = torch.cat((out_tx, torch.index_select(tx[b], 0, obj[b])), dim=0)
-
         return out_rx, out_tx
diff --git a/lib/pspnet.py b/lib/pspnet.py
@@ -38,7 +38,7 @@ def forward(self, x):
 
 
 class PSPNet(nn.Module):
-    def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet34',
+    def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet18',
                  pretrained=False):
         super(PSPNet, self).__init__()
         self.feats = getattr(extractors, backend)(pretrained)