diff --git a/model/attention/PolarizedSelfAttention.py b/model/attention/PolarizedSelfAttention.py deleted file mode 100644 index d85ebd8..0000000 --- a/model/attention/PolarizedSelfAttention.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -MindSpore implementation of 'PolarizedSelfAttention' -Refer to "Polarized Self-Attention: Towards High-quality Pixel-wise Regression" -""" -import mindspore as ms -from mindspore import nn - - -class ParallelPolarizedSelfAttention(nn.Cell): - """ Parallel Polarized Self Attention """ - def __init__(self, channel=512): - super().__init__() - self.ch_wv = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.ch_wq = nn.Conv2d(channel, 1, kernel_size=(1, 1)) - self.softmax_channel = nn.Softmax(1) - self.softmax_spatial = nn.Softmax(-1) - self.ch_wz = nn.Conv2d(channel // 2, channel, kernel_size=(1, 1)) - self.ln = nn.LayerNorm((channel,)) - self.sigmoid = nn.Sigmoid() - self.sp_wv = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.sp_wq = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.agp = nn.AdaptiveAvgPool2d((1, 1)) - - def construct(self, x): - b, c, h, w = x.shape - - # Channel-only Self-Attention - channel_wv = self.ch_wv(x) # bs,c//2,h,w - channel_wq = self.ch_wq(x) # bs,1,h,w - channel_wv = channel_wv.reshape(b, c // 2, -1) # bs,c//2,h*w - channel_wq = channel_wq.reshape(b, -1, 1) # bs,h*w,1 - channel_wq = self.softmax_channel(channel_wq) - channel_wz = ms.ops.matmul(channel_wv, channel_wq).unsqueeze(-1) # bs,c//2,1,1 - channel_weight = self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b, c, 1).permute(0, 2, 1))).permute(0, 2, - 1).reshape( - b, c, 1, 1) # bs,c,1,1 - channel_out = channel_weight * x - - # Spatial-only Self-Attention - spatial_wv = self.sp_wv(x) # bs,c//2,h,w - spatial_wq = self.sp_wq(x) # bs,c//2,h,w - spatial_wq = self.agp(spatial_wq) # bs,c//2,1,1 - spatial_wv = spatial_wv.reshape(b, c // 2, -1) # bs,c//2,h*w - spatial_wq = spatial_wq.permute(0, 2, 3, 1).reshape(b, 1, c // 2) # bs,1,c//2 - spatial_wq = self.softmax_spatial(spatial_wq) - spatial_wz = ms.ops.matmul(spatial_wq, spatial_wv) # bs,1,h*w - spatial_weight = self.sigmoid(spatial_wz.reshape(b, 1, h, w)) # bs,1,h,w - spatial_out = spatial_weight * x - out = spatial_out + channel_out - return out - - -class SequentialPolarizedSelfAttention(nn.Cell): - """ Sequential Polarized Self Attention """ - def __init__(self, channel=512): - super().__init__() - self.ch_wv = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.ch_wq = nn.Conv2d(channel, 1, kernel_size=(1, 1)) - self.softmax_channel = nn.Softmax(1) - self.softmax_spatial = nn.Softmax(-1) - self.ch_wz = nn.Conv2d(channel // 2, channel, kernel_size=(1, 1)) - self.ln = nn.LayerNorm((channel,)) - self.sigmoid = nn.Sigmoid() - self.sp_wv = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.sp_wq = nn.Conv2d(channel, channel // 2, kernel_size=(1, 1)) - self.agp = nn.AdaptiveAvgPool2d((1, 1)) - - def construct(self, x): - b, c, h, w = x.shape - - # Channel-only Self-Attention - channel_wv = self.ch_wv(x) # bs,c//2,h,w - channel_wq = self.ch_wq(x) # bs,1,h,w - channel_wv = channel_wv.reshape(b, c // 2, -1) # bs,c//2,h*w - channel_wq = channel_wq.reshape(b, -1, 1) # bs,h*w,1 - channel_wq = self.softmax_channel(channel_wq) - channel_wz = ms.ops.matmul(channel_wv, channel_wq).unsqueeze(-1) # bs,c//2,1,1 - channel_weight = self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b, c, 1).permute(0, 2, 1))).permute(0, 2, - 1).reshape( - b, c, 1, 1) # bs,c,1,1 - channel_out = channel_weight * x - - # Spatial-only Self-Attention - spatial_wv = self.sp_wv(channel_out) # bs,c//2,h,w - spatial_wq = self.sp_wq(channel_out) # bs,c//2,h,w - spatial_wq = self.agp(spatial_wq) # bs,c//2,1,1 - spatial_wv = spatial_wv.reshape(b, c // 2, -1) # bs,c//2,h*w - spatial_wq = spatial_wq.permute(0, 2, 3, 1).reshape(b, 1, c // 2) # bs,1,c//2 - spatial_wq = self.softmax_spatial(spatial_wq) - spatial_wz = ms.ops.matmul(spatial_wq, spatial_wv) # bs,1,h*w - spatial_weight = self.sigmoid(spatial_wz.reshape(b, 1, h, w)) # bs,1,h,w - spatial_out = spatial_weight * channel_out - return spatial_out - - -if __name__ == '__main__': - dummy_input = ms.ops.randn((1, 512, 7, 7)) - psa = SequentialPolarizedSelfAttention(channel=512) - output = psa(dummy_input) - print(output.shape)