forked from lidongyv/Reppoint-Tracking
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathy_net_model.py
166 lines (135 loc) · 4.76 KB
/
y_net_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import torch
import torch.nn as nn
import torch.nn.functional as F
class double_conv(nn.Module):
"""(conv => GN => LeakyReLU) * 2"""
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
nn.GroupNorm(32, out_ch),
nn.LeakyReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
nn.GroupNorm(32, out_ch),
nn.LeakyReLU(inplace=True)
)
def forward(self, x):
x = self.conv(x)
return x
class inconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(inconv, self).__init__()
self.conv = double_conv(in_ch, out_ch)
def forward(self, x):
x = self.conv(x)
return x
class down(nn.Module):
def __init__(self, in_ch, out_ch):
super(down, self).__init__()
self.mpconv = nn.Sequential(
nn.Conv2d(in_ch, in_ch, 3, stride=2, padding=1, bias=False),
nn.GroupNorm(32, in_ch),
nn.LeakyReLU(inplace=True),
double_conv(in_ch, out_ch)
)
def forward(self, x):
x = self.mpconv(x)
return x
class up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=False):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
# but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
else:
self.up = nn.Sequential(
nn.ConvTranspose2d(in_ch // 2, in_ch // 2, 4, stride=2,padding=1, bias=False),
nn.GroupNorm(32, in_ch // 2),
nn.LeakyReLU(inplace=True),
double_conv(in_ch // 2, in_ch // 2)
)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1) # skip-connection
x = self.conv(x)
return x
class outconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(outconv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 1, padding=0, bias=False),
nn.GroupNorm(32, out_ch),
nn.LeakyReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 1, padding=0, bias=False),
nn.Conv2d(out_ch, out_ch, 1, padding=0, bias=False)
)
def forward(self, x):
x = self.conv(x)
return x
class YNet(nn.Module):
def __init__(self, n_dim=32):
super(YNet, self).__init__()
# RGB branch
self.inc_x = inconv(3, 64)
self.down1_x = down(64, 128)
self.down2_x = down(128, 256)
self.down3_x = down(256, 256)
# self.down4_x = down(512, 512)
# Flow branch
self.inc_f = inconv(2, 64)
self.down1_f = down(64, 128)
self.down2_f = down(128, 256)
self.down3_f = down(256, 256)
# self.down4_f = down(512, 512)
self.up1 = up(1024, 256)
self.up2 = up(512, 128)
self.up3 = up(256, 64)
# self.up4 = up(128, 64)
self.outc = outconv(64, n_dim)
# Foreground mask generation
self.mask_conv = nn.Sequential(
nn.GroupNorm(32, n_dim),
nn.LeakyReLU(inplace=True),
nn.Conv2d(n_dim, 16, 1, bias=False),
nn.LeakyReLU(inplace=True),
nn.Conv2d(16, 8, 1, bias=False),
nn.Conv2d(8, 1, 1, bias=False),
)
self.sigmoid = nn.Sigmoid()
def forward(self, x, f):
# Encoder
x1 = self.inc_x(x) # 64
x2 = self.down1_x(x1) # 128
x3 = self.down2_x(x2) # 256
x4 = self.down3_x(x3) # 256
f1 = self.inc_f(f) # 64
f2 = self.down1_f(f1) # 128
f3 = self.down2_f(f2) # 256
f4 = self.down3_f(f3) # 256
# Mid-level concat
y1 = torch.cat([x1, f1], dim=1) # 128
y2 = torch.cat([x2, f2], dim=1) # 256
y3 = torch.cat([x3, f3], dim=1) # 512
y4 = torch.cat([x4, f4], dim=1) # 512
# Decoder
y = self.up1(y4, y3) # 256
y = self.up2(y, y2) # 128
y = self.up3(y, y1) # 64
y = self.outc(y)
# Get Mask
mask = self.mask_conv(y)
mask = self.sigmoid(mask)
return y, mask
if __name__ == "__main__":
img, flow = torch.rand((1, 3, 320, 512)), torch.rand((1, 2, 320, 512))
net = YNet()
feature, mask = net(img, flow)
print(sum([x.numel() for x in net.parameters()]))
print(feature.shape)