forked from haofengac/MonoDepth-FPN-PyTorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_fpn.py
124 lines (103 loc) · 4.39 KB
/
model_fpn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision.models.resnet import resnet101
def agg_node(in_planes, out_planes):
return nn.Sequential(
nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
)
def smooth(in_planes, out_planes):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
)
def predict(in_planes, out_planes):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1),
nn.Sigmoid(),
)
def upshuffle(in_planes, out_planes, upscale_factor):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes*upscale_factor**2, kernel_size=3, stride=1, padding=1),
nn.PixelShuffle(upscale_factor),
nn.ReLU()
)
class I2D(nn.Module):
def __init__(self, pretrained=True, fixed_feature_weights=False):
super(I2D, self).__init__()
resnet = resnet101(pretrained=pretrained)
# Freeze those weights
if fixed_feature_weights:
for p in resnet.parameters():
p.requires_grad = False
self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool)
self.layer1 = nn.Sequential(resnet.layer1)
self.layer2 = nn.Sequential(resnet.layer2)
self.layer3 = nn.Sequential(resnet.layer3)
self.layer4 = nn.Sequential(resnet.layer4)
# Top layer
self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels
# Lateral layers
self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)
# Smooth layers
self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
# Aggregate layers
self.agg1 = agg_node(256, 128)
self.agg2 = agg_node(256, 128)
self.agg3 = agg_node(256, 128)
self.agg4 = agg_node(256, 128)
# Upshuffle layers
self.up1 = upshuffle(128,128,8)
self.up2 = upshuffle(128,128,4)
self.up3 = upshuffle(128,128,2)
# Depth prediction
self.predict1 = smooth(512, 128)
self.predict2 = predict(128, 1)
def _upsample_add(self, x, y):
'''Upsample and add two feature maps.
Args:
x: (Variable) top feature map to be upsampled.
y: (Variable) lateral feature map.
Returns:
(Variable) added feature map.
Note in PyTorch, when input size is odd, the upsampled feature map
with `F.upsample(..., scale_factor=2, mode='nearest')`
maybe not equal to the lateral feature map size.
e.g.
original input size: [N,_,15,15] ->
conv2d feature map size: [N,_,8,8] ->
upsampled feature map size: [N,_,16,16]
So we choose bilinear upsample which supports arbitrary output sizes.
'''
_,_,H,W = y.size()
return F.upsample(x, size=(H,W), mode='bilinear') + y
def forward(self, x):
_,_,H,W = x.size()
# Bottom-up
c1 = self.layer0(x)
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
# Top-down
p5 = self.toplayer(c5)
p4 = self._upsample_add(p5, self.latlayer1(c4))
p4 = self.smooth1(p4)
p3 = self._upsample_add(p4, self.latlayer2(c3))
p3 = self.smooth2(p3)
p2 = self._upsample_add(p3, self.latlayer3(c2))
p2 = self.smooth3(p2)
# Top-down predict and refine
d5, d4, d3, d2 = self.up1(self.agg1(p5)), self.up2(self.agg2(p4)), self.up3(self.agg3(p3)), self.agg4(p2)
_,_,H,W = d2.size()
vol = torch.cat( [ F.upsample(d, size=(H,W), mode='bilinear') for d in [d5,d4,d3,d2] ], dim=1 )
# return self.predict2( self.up4(self.predict1(vol)) )
return self.predict2( self.predict1(vol) ) # img : depth = 4 : 1