geniuspatrick
/
mindcv-2023

 
			
							"""
MindSpore implementation of `VGGNet`.
Refer to SqueezeNet: Very Deep Convolutional Networks for Large-Scale Image Recognition.
"""

import math
from typing import Dict, List, Union

import mindspore.common.initializer as init
from mindspore import Tensor, nn

from .registry import register_model
from .utils import load_pretrained

__all__ = [
    "VGG",
    "vgg11",
    "vgg13",
    "vgg16",
    "vgg19",
]


def _cfg(url="", **kwargs):
    return {
        "url": url,
        "num_classes": 1000,
        "first_conv": "features.0",
        "classifier": "classifier.6",
        **kwargs,
    }


default_cfgs = {
    "vgg11": _cfg(url=""),
    "vgg13": _cfg(url=""),
    "vgg16": _cfg(url="https://download.mindspore.cn/toolkits/mindcv/vgg/vgg16_224.ckpt"),
    "vgg19": _cfg(url="https://download.mindspore.cn/toolkits/mindcv/vgg/vgg19_224.ckpt"),
}


cfgs: Dict[str, List[Union[str, int]]] = {
    "vgg11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "vgg13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "vgg16": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
    "vgg19": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
}


def _make_layers(
    cfg: List[Union[str, int]],
    batch_norm: bool = False,
    in_channels: int = 3,
) -> nn.SequentialCell:
    """define the basic block of VGG"""
    layers = []
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, pad_mode="pad", padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()]
            else:
                layers += [conv2d, nn.ReLU()]
            in_channels = v

    return nn.SequentialCell(layers)


class VGG(nn.Cell):
    r"""VGGNet model class, based on
    `"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>`_

    Args:
        model_name: name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.
        batch_norm: use batch normalization or not. Default: False.
        num_classes: number of classification classes. Default: 1000.
        in_channels: number the channels of the input. Default: 3.
        drop_rate: dropout rate of the classifier. Default: 0.5.
    """

    def __init__(
        self,
        model_name: str,
        batch_norm: bool = False,
        num_classes: int = 1000,
        in_channels: int = 3,
        drop_rate: float = 0.5,
    ) -> None:
        super().__init__()
        cfg = cfgs[model_name]
        self.features = _make_layers(cfg, batch_norm=batch_norm, in_channels=in_channels)
        self.flatten = nn.Flatten()
        self.classifier = nn.SequentialCell([
            nn.Dense(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(keep_prob=1 - drop_rate),
            nn.Dense(4096, 4096),
            nn.ReLU(),
            nn.Dropout(keep_prob=1 - drop_rate),
            nn.Dense(4096, num_classes),
        ])
        self._initialize_weights()

    def _initialize_weights(self) -> None:
        """Initialize weights for cells."""
        for _, cell in self.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.set_data(
                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
                                     cell.weight.shape, cell.weight.dtype))
                if cell.bias is not None:
                    cell.bias.set_data(
                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
            elif isinstance(cell, nn.Dense):
                cell.weight.set_data(
                    init.initializer(init.Normal(0.01), cell.weight.shape, cell.weight.dtype))
                if cell.bias is not None:
                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))

    def forward_features(self, x: Tensor) -> Tensor:
        x = self.features(x)
        return x

    def forward_head(self, x: Tensor) -> Tensor:
        x = self.flatten(x)
        x = self.classifier(x)
        return x

    def construct(self, x: Tensor) -> Tensor:
        x = self.forward_features(x)
        x = self.forward_head(x)
        return x


@register_model
def vgg11(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
    """Get 11 layers VGG model.
    Refer to the base class `models.VGG` for more details.
    """
    default_cfg = default_cfgs["vgg11"]
    model = VGG(model_name="vgg11", num_classes=num_classes, in_channels=in_channels, **kwargs)

    if pretrained:
        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)

    return model


@register_model
def vgg13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
    """Get 13 layers VGG model.
    Refer to the base class `models.VGG` for more details.
    """
    default_cfg = default_cfgs["vgg13"]
    model = VGG(model_name="vgg13", num_classes=num_classes, in_channels=in_channels, **kwargs)

    if pretrained:
        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)

    return model


@register_model
def vgg16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
    """Get 16 layers VGG model.
    Refer to the base class `models.VGG` for more details.
    """
    default_cfg = default_cfgs["vgg16"]
    model = VGG(model_name="vgg16", num_classes=num_classes, in_channels=in_channels, **kwargs)

    if pretrained:
        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)

    return model


@register_model
def vgg19(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
    """Get 19 layers VGG model.
    Refer to the base class `models.VGG` for more details.
    """
    default_cfg = default_cfgs["vgg19"]
    model = VGG(model_name="vgg19", num_classes=num_classes, in_channels=in_channels, **kwargs)

    if pretrained:
        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)

    return model