OpenModelZoo
/
wxd-yolox

 
			
							# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================================
""" yolox pa-fpn module """
import mindspore.nn as nn
from mindspore.ops import operations as P

from src.darknet import CSPDarknet
from src.network_blocks import BaseConv, CSPLayer, DWConv


class YOLOPAFPN(nn.Cell):
    """
    YOLOv3 model. Darknet 53 is the default backbone of this model
    """

    def __init__(
            self,
            input_w,
            input_h,
            depth=1.0,
            width=1.0,
            in_features=("dark3", "dark4", "dark5"),
            in_channels=None,
            depthwise=False,
            act="silu"
    ):
        super(YOLOPAFPN, self).__init__()
        if in_channels is None:
            in_channels = [256, 512, 1024]
        self.input_w = input_w
        self.input_h = input_h
        self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
        self.in_features = in_features
        self.in_channels = in_channels
        Conv = DWConv if depthwise else BaseConv

        self.upsample0 = P.ResizeNearestNeighbor((input_h // 16, input_w // 16))
        self.upsample1 = P.ResizeNearestNeighbor((input_h // 8, input_w // 8))
        self.lateral_conv0 = BaseConv(int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act)
        self.C3_p4 = CSPLayer(
            int(2 * in_channels[1] * width),
            int(in_channels[1] * width),
            round(3 * depth),
            False,
            depthwise=depthwise,
            act=act
        )
        self.reduce_conv1 = BaseConv(
            int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
        )
        self.C3_p3 = CSPLayer(
            int(2 * in_channels[0] * width),
            int(in_channels[0] * width),
            round(3 * depth),
            False,
            depthwise=depthwise,
            act=act,
        )
        # bottom-up conv
        self.bu_conv2 = Conv(
            int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
        )
        self.C3_n3 = CSPLayer(
            int(2 * in_channels[0] * width),
            int(in_channels[1] * width),
            round(3 * depth),
            False,
            depthwise=depthwise,
            act=act,
        )

        # bottom-up conv
        self.bu_conv1 = Conv(
            int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
        )
        self.C3_n4 = CSPLayer(
            int(2 * in_channels[1] * width),
            int(in_channels[2] * width),
            round(3 * depth),
            False,
            depthwise=depthwise,
            act=act,
        )
        self.concat = P.Concat(axis=1)

    def construct(self, inputs):
        """
        Args:
            inputs: input images.

        Returns:
            Tuple[Tensor]: FPN feature.
        """

        x2, x1, x0 = self.backbone(inputs)
        fpn_out0 = self.lateral_conv0(x0)  # 1024->512  /32
        f_out0 = self.upsample0(fpn_out0)  # 512    /16
        f_out0 = self.concat((f_out0, x1))  # 512->1024    /16
        f_out0 = self.C3_p4(f_out0)  # 1024->512  /16

        fpn_out1 = self.reduce_conv1(f_out0)  # 512->256  /16
        f_out1 = self.upsample1(fpn_out1)  # 256  /8
        f_out1 = self.concat((f_out1, x2))  # 256->512  /8
        pan_out2 = self.C3_p3(f_out1)  # 512->256  /16

        p_out1 = self.bu_conv2(pan_out2)  # 256->256  /16
        p_out1 = self.concat((p_out1, fpn_out1))  # 256->512  /16
        pan_out1 = self.C3_n3(p_out1)  # 512->512/16

        p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
        p_out0 = self.concat((p_out0, fpn_out0))  # 512->1024/32
        pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32

        return pan_out2, pan_out1, pan_out0