forked from MenghaoGuo/Awesome-Vision-Attentions
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request MenghaoGuo#20 from uyzhang/main
update
- Loading branch information
Showing
3 changed files
with
182 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Improving convolutional networks with self-calibrated convolutions (CVPR 2020) | ||
import jittor as jt | ||
from jittor import nn | ||
from matplotlib.pyplot import grid | ||
|
||
|
||
class SCConv(nn.Module): | ||
def __init__(self, inplanes, planes, stride, padding, dilation, groups, pooling_r, norm_layer): | ||
super(SCConv, self).__init__() | ||
self.k2 = nn.Sequential( | ||
nn.AvgPool2d(kernel_size=pooling_r, stride=pooling_r), | ||
nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, | ||
padding=padding, dilation=dilation, | ||
groups=groups, bias=False), | ||
norm_layer(planes), | ||
) | ||
self.k3 = nn.Sequential( | ||
nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, | ||
padding=padding, dilation=dilation, | ||
groups=groups, bias=False), | ||
norm_layer(planes), | ||
) | ||
self.k4 = nn.Sequential( | ||
nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, | ||
padding=padding, dilation=dilation, | ||
groups=groups, bias=False), | ||
norm_layer(planes), | ||
) | ||
|
||
def execute(self, x): | ||
identity = x | ||
|
||
out = jt.sigmoid(jt.add(identity, nn.interpolate( | ||
self.k2(x), identity.size()[2:]))) # sigmoid(identity + k2) | ||
out = jt.multiply(self.k3(x), out) # k3 * sigmoid(identity + k2) | ||
out = self.k4(out) # k4 | ||
|
||
return out | ||
|
||
|
||
def main(): | ||
attention_block = SCConv(64, 64, stride=1, | ||
padding=2, dilation=2, groups=1, pooling_r=4, norm_layer=nn.BatchNorm2d) | ||
input = jt.rand([4, 64, 32, 32]) | ||
output = attention_block(input) | ||
print(input.size(), output.size()) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# Strip Pooling: Rethinking spatial pooling for scene parsing (CVPR 2020) | ||
import jittor as jt | ||
from jittor import nn | ||
|
||
|
||
class StripPooling(nn.Module): | ||
""" | ||
Reference: | ||
""" | ||
|
||
def __init__(self, in_channels, pool_size, norm_layer, up_kwargs): | ||
super(StripPooling, self).__init__() | ||
self.pool1 = nn.AdaptiveAvgPool2d(pool_size[0]) | ||
self.pool2 = nn.AdaptiveAvgPool2d(pool_size[1]) | ||
self.pool3 = nn.AdaptiveAvgPool2d((1, None)) | ||
self.pool4 = nn.AdaptiveAvgPool2d((None, 1)) | ||
|
||
inter_channels = int(in_channels/4) | ||
self.conv1_1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), | ||
norm_layer(inter_channels), | ||
nn.ReLU()) | ||
self.conv1_2 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), | ||
norm_layer(inter_channels), | ||
nn.ReLU()) | ||
self.conv2_0 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), | ||
norm_layer(inter_channels)) | ||
self.conv2_1 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), | ||
norm_layer(inter_channels)) | ||
self.conv2_2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), | ||
norm_layer(inter_channels)) | ||
self.conv2_3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (1, 3), 1, (0, 1), bias=False), | ||
norm_layer(inter_channels)) | ||
self.conv2_4 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (3, 1), 1, (1, 0), bias=False), | ||
norm_layer(inter_channels)) | ||
self.conv2_5 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), | ||
norm_layer(inter_channels), | ||
nn.ReLU()) | ||
self.conv2_6 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), | ||
norm_layer(inter_channels), | ||
nn.ReLU()) | ||
self.conv3 = nn.Sequential(nn.Conv2d(inter_channels*2, in_channels, 1, bias=False), | ||
norm_layer(in_channels)) | ||
# bilinear interpolate options | ||
self._up_kwargs = up_kwargs | ||
|
||
def execute(self, x): | ||
_, _, h, w = x.size() | ||
x1 = self.conv1_1(x) | ||
x2 = self.conv1_2(x) | ||
x2_1 = self.conv2_0(x1) | ||
x2_2 = nn.interpolate(self.conv2_1(self.pool1(x1)), | ||
(h, w), **self._up_kwargs) | ||
x2_3 = nn.interpolate(self.conv2_2(self.pool2(x1)), | ||
(h, w), **self._up_kwargs) | ||
x2_4 = nn.interpolate(self.conv2_3(self.pool3(x2)), | ||
(h, w), **self._up_kwargs) | ||
x2_5 = nn.interpolate(self.conv2_4(self.pool4(x2)), | ||
(h, w), **self._up_kwargs) | ||
x1 = self.conv2_5(nn.relu(x2_1 + x2_2 + x2_3)) | ||
x2 = self.conv2_6(nn.relu(x2_5 + x2_4)) | ||
out = self.conv3(jt.concat([x1, x2], dim=1)) | ||
return nn.relu(x + out) | ||
|
||
|
||
def main(): | ||
attention_block = StripPooling( | ||
64, (20, 12), nn.BatchNorm2d, {'mode': 'bilinear', 'align_corners': True}) | ||
input = jt.rand([4, 64, 32, 32]) | ||
output = attention_block(input) | ||
print(input.size(), output.size()) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Beyond Self-attention: External Attention using Two Linear Layers for Visual Tasks (CVMJ2021) | ||
import jittor as jt | ||
from jittor import nn | ||
|
||
|
||
class External_attention(nn.Module): | ||
''' | ||
Arguments: | ||
c (int): The input and output channel number. | ||
''' | ||
|
||
def __init__(self, c): | ||
super(External_attention, self).__init__() | ||
|
||
self.conv1 = nn.Conv2d(c, c, 1) | ||
|
||
self.k = 64 | ||
self.linear_0 = nn.Conv1d(c, self.k, 1, bias=False) | ||
|
||
self.linear_1 = nn.Conv1d(self.k, c, 1, bias=False) | ||
self.linear_1.weight = self.linear_0.weight.permute(1, 0, 2) | ||
|
||
self.conv2 = nn.Sequential( | ||
nn.Conv2d(c, c, 1, bias=False), | ||
nn.BatchNorm(c)) | ||
|
||
self.relu = nn.ReLU() | ||
|
||
def execute(self, x): | ||
idn = x | ||
x = self.conv1(x) | ||
|
||
b, c, h, w = x.size() | ||
n = h*w | ||
x = x.view(b, c, h*w) # b * c * n | ||
|
||
attn = self.linear_0(x) # b, k, n | ||
attn = nn.softmax(attn, dim=-1) # b, k, n | ||
|
||
attn = attn / (1e-9 + attn.sum(dim=1, keepdims=True)) # b, k, n | ||
x = self.linear_1(attn) # b, c, n | ||
|
||
x = x.view(b, c, h, w) | ||
x = self.conv2(x) | ||
x = x + idn | ||
x = self.relu(x) | ||
return x | ||
|
||
|
||
def main(): | ||
attention_block = External_attention(64) | ||
input = jt.rand([4, 64, 32, 32]) | ||
output = attention_block(input) | ||
print(input.size(), output.size()) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |