Delete 'model/backbone/convnextv2.py'

2 years ago · 2d6774dd33
--- a/model/backbone/convnextv2.py
+++ b/model/backbone/convnextv2.py
@@ -1,181 +0,0 @@
 # pylint: disable=E0401
 """
 MindSpore implementation of `ConvNextV2`.
 Refer to ConvNeXtV2: Co-designing and Scaling ConvNets with Masked Autoencoders
 """
 import mindspore as ms
 from mindspore import nn
 from model.layers import DropPath


 class Block(nn.Cell):
    """ Block """
    def __init__(self, dim, drop_path=0.):
        super().__init__()
        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, pad_mode='pad', padding=3, group=dim)
        self.norm = LayerNorm(dim, eps=1e-6, data_format="channels_last")
        self.pwconv1 = nn.Dense(dim, 4 * dim)
        self.act = nn.GELU()
        self.grn = GRN(4 * dim)
        self.pwconv2 = nn.Dense(4 * dim, dim)
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()

    def construct(self, x):
        y = x
        x = self.dwconv(x)
        x = x.permute(0, 2, 3, 1)
        x = self.norm(x)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.grn(x)

        x = self.pwconv2(x)

        x = x.permute(0, 3, 1, 2)
        x = y + self.drop_path(x)

        return x


 class LayerNorm(nn.Cell):
    """ LayerNorm """
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_first"):
        super().__init__()
        if data_format == "channels_first":
            self.weight = ms.Parameter(ms.ops.ones(normalized_shape))
            self.bias = ms.Parameter(ms.ops.zeros(normalized_shape))
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError()
        self.normalized_shape = (normalized_shape,)
        self.layer_norm = nn.LayerNorm(self.normalized_shape, gamma_init='ones', beta_init='zeros', epsilon=self.eps)

    def construct(self, x):
        if self.data_format == "channels_last":
            # layer_norm = nn.LayerNorm()
            # return ms.ops.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
            return self.layer_norm(x)
        u = x.mean(1, keep_dims=True)
        s = (x - u).pow(2).mean(1, keep_dims=True)
        x = (x - u) / ms.ops.sqrt(s + self.eps)
        x = self.weight[:, None, None] * x + self.bias[:, None, None]
        return x


 class GRN(nn.Cell):
    """ GRN """
    def __init__(self, dim):
        super().__init__()
        self.gamma = ms.Parameter(ms.ops.zeros((1, 1, 1, dim)))
        self.beta = ms.Parameter(ms.ops.zeros((1, 1, 1, dim)))

    def construct(self, x):
        Gx = ms.ops.norm(x, ord=2, dim=(1, 2), keepdim=True)
        Nx = Gx / (Gx.mean(axis=-1, keep_dims=True) + 1e-6)
        return self.gamma * (x * Nx) + self.beta + x


 class ConvNextV2(nn.Cell):
    """ ConvNextV2 """
    def __init__(self, in_channels=3, num_classes=1000,
                 depth=None, dims=None,
                 drop_path_rate=0., head_init_scale=1.):
        super().__init__()
        if depth is None:
            depth = [3, 3, 9, 3]
        if dims is None:
            dims = [96, 192, 384, 768]
        self.depth = depth
        self.downsample_layers = nn.SequentialCell()
        stem = nn.SequentialCell(
            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4),
            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
        )
        self.downsample_layers.append(stem)
        for i in range(3):
            downsample_layer = nn.SequentialCell(
                LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2)
            )
            self.downsample_layers.append(downsample_layer)

        self.stages = nn.SequentialCell()
        dp_rates = ms.ops.linspace(0, drop_path_rate, sum(depth))
        cur = 0
        for i in range(4):
            stage = nn.SequentialCell(
                *[Block(dim=dims[i], drop_path=dp_rates[cur + j]) for j in range(depth[i])]
            )
            self.stages.append(stage)
            cur += depth[i]

        self.norm = nn.LayerNorm((dims[-1],), epsilon=1e-6)
        self.head = nn.Dense(dims[-1], num_classes)

        self.head.weight.set_data(self.head.weight.data * head_init_scale)
        if self.head.bias is not None:
            self.head.bias.set_data(self.head.bias.data * head_init_scale)

    def construct(self, x):
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
        x = self.norm(x.mean([-2, -1]))
        x = self.head(x)
        return x


 def convnextv2_atto(**kwargs):
    """ ConvNextV2-atto """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[40, 80, 160, 320], **kwargs)
    return model


 def convnextv2_femto(**kwargs):
    """ ConvNextV2-femto """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[48, 96, 192, 384], **kwargs)
    return model


 def convnextv2_pico(**kwargs):
    """ ConvNextV2-pico """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[64, 128, 256, 512], **kwargs)
    return model


 def convnextv2_nano(**kwargs):
    """ ConvNextV2-nano """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[80, 160, 320, 640], **kwargs)
    return model


 def convnextv2_tiny(**kwargs):
    """ ConvNextV2-tiny """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[96, 192, 384, 768], **kwargs)
    return model


 def convnextv2_base(**kwargs):
    """ ConvNextV2-base """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[128, 256, 512, 1024], **kwargs)
    return model


 def convnextv2_large(**kwargs):
    """ ConvNextV2-large """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[192, 384, 768, 1536], **kwargs)
    return model


 def convnextv2_huge(**kwargs):
    """ ConvNextV2-huge """
    model = ConvNextV2(depth=[2, 2, 6, 2], dims=[352, 704, 1408, 2816], **kwargs)
    return model


 if __name__ == "__main__":
    convnextv2 = convnextv2_atto()
    dummy_input = ms.ops.randn((1, 3, 224, 224))
    output = convnextv2(dummy_input)
    print(output.shape)