despacito
/
test_efd

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""common utils"""
import math
from typing import Union
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.ops import functional as F
import mindspore as ms
import mindspore.nn as nn
from mindspore.common import dtype as mstype
import mindspore.common.initializer as weight_init
import numpy as np
import cv2
from src.nms import batched_nms

GRADIENT_CLIP_TYPE = 1
GRADIENT_CLIP_VALUE = 1.0
clip_grad = C.MultitypeFuncGraph("clip_grad")
grad_scale = ms.ops.MultitypeFuncGraph("grad_scale")

@grad_scale.register("Tensor", "Tensor")
def gradient_scale(scale, grad):
    return grad * ms.ops.cast(scale, ms.ops.dtype(grad))


@clip_grad.register("Number", "Number", "Tensor")
def _clip_grad(clip_type, clip_value, grad):
    """
    Clip gradients.

    Inputs:
        clip_type (int): The way to clip, 0 for 'value', 1 for 'norm'.
        clip_value (float): Specifies how much to clip.
        grad (tuple[Tensor]): Gradients.

    Outputs:
        tuple[Tensor], clipped gradients.
    """
    if clip_type not in (0, 1):
        return grad
    dt = F.dtype(grad)
    if clip_type == 0:
        new_grad = C.clip_by_value(grad, F.cast(F.tuple_to_array((-clip_value,)), dt),
                                   F.cast(F.tuple_to_array((clip_value,)), dt))
    else:
        new_grad = nn.ClipByNorm()(grad, F.cast(F.tuple_to_array((clip_value,)), dt))
    return new_grad


class EfficientDetTrainOneStepCell(nn.TrainOneStepCell):
    """
    Encapsulation class of bert network training.

    Append an optimizer to the training network after that the construct
    function can be called to create the backward graph.

    Args:
        network (Cell): The training network. Note that loss function should have been added.
        optimizer (Optimizer): Optimizer for updating the weights.
        sens (Number): The adjust parameter. Default: 1.0.
        enable_clip_grad (boolean): If True, clip gradients in BertTrainOneStepCell. Default: True.
    """

    def __init__(self, network, optimizer, sens=1.0, enable_clip_grad=True):
        super(EfficientDetTrainOneStepCell, self).__init__(network, optimizer, sens)
        self.cast = P.Cast()
        self.hyper_map = C.HyperMap()
        self.enable_clip_grad = enable_clip_grad

    def set_sens(self, value):
        self.sens = value

    def construct(self, x, y):
        """Defines the computation performed."""
        weights = self.weights

        loss = self.network(x, y)
        grads = self.grad(self.network, weights)(x, y, self.cast(F.tuple_to_array((self.sens,)), mstype.float32))
        if self.enable_clip_grad:
            grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)

        grads = self.grad_reducer(grads)
        succ = self.optimizer(grads)
        return F.depend(loss, succ)

def _calculate_fan_in_and_fan_out(tensor):
    """
    _calculate_fan_in_and_fan_out
    """
    dimensions = len(tensor)
    if dimensions < 2:
        raise ValueError("Fan in and fan out can not be computed for tensor"
                         " with fewer than 2 dimensions")
    if dimensions == 2:  # Linear
        fan_in = tensor[1]
        fan_out = tensor[0]
    else:
        num_input_fmaps = tensor[1]
        num_output_fmaps = tensor[0]
        receptive_field_size = 1
        if dimensions > 2:
            receptive_field_size = tensor[2] * tensor[3]
        fan_in = num_input_fmaps * receptive_field_size
        fan_out = num_output_fmaps * receptive_field_size
    return fan_in, fan_out

def init_weights(model):
    """ init weights of net"""

    for name, cell in model.cells_and_names():
        is_conv_layer = isinstance(cell, nn.Conv2d)

        if is_conv_layer:

            if "conv_list" in name or "header" in name:
                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.shape)
                sigma = math.sqrt(1. / float(fan_in))
                data = ms.Tensor(np.random.normal(loc=0, scale=sigma, size=cell.weight.shape).astype(np.float32))
                cell.weight.set_data(weight_init.initializer(data, cell.weight.shape))

            else:
                cell.weight.set_data(weight_init.initializer(weight_init.HeUniform(),
                                                             cell.weight.shape,
                                                             cell.weight.dtype))

            if cell.has_bias is True:
                if "classifier.header" in name:
                    bias_value = -np.log((1 - 0.01) / 0.01)
                    cell.bias.set_data(weight_init.initializer(bias_value, cell.bias.shape))
                else:
                    cell.bias.set_data(weight_init.initializer('zeros', cell.bias.shape))


def bbox_transform(anchors, regression):
    """ convert box x1y1x2y2 to xywh """
    y_centers_a = (anchors[..., 0] + anchors[..., 2]) / 2
    x_centers_a = (anchors[..., 1] + anchors[..., 3]) / 2
    ha = anchors[..., 2] - anchors[..., 0]
    wa = anchors[..., 3] - anchors[..., 1]

    w = np.exp(regression[..., 3]) * wa
    h = np.exp(regression[..., 2]) * ha

    y_centers = regression[..., 0] * ha + y_centers_a
    x_centers = regression[..., 1] * wa + x_centers_a

    ymin = y_centers - h / 2.
    xmin = x_centers - w / 2.
    ymax = y_centers + h / 2.
    xmax = x_centers + w / 2.

    return np.stack([xmin, ymin, xmax, ymax], axis=2)


def clipBoxes(boxes, img):
    """ clip the overflow value """
    _, _, height, width = img.shape

    boxes[:, :, 0] = np.clip(boxes[:, :, 0], a_min=0, a_max=None)
    boxes[:, :, 1] = np.clip(boxes[:, :, 1], a_min=0, a_max=None)
    boxes[:, :, 2] = np.clip(boxes[:, :, 2], a_min=None, a_max=width - 1)
    boxes[:, :, 3] = np.clip(boxes[:, :, 3], a_min=None, a_max=height - 1)

    return boxes

def invert_affine(metas: Union[float, list, tuple], preds):
    """ resize the output to real size """
    for i in range(len(preds)):
        if preds[i]['rois'].shape[0] == 0:
            continue
        else:
            if metas is float:
                preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / metas
                preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / metas
            else:
                new_w, new_h, old_w, old_h, _, _ = metas[i]
                preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / (new_w / old_w)
                preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / (new_h / old_h)
    return preds

def aspectaware_resize_padding(image, width, height, interpolation=None, means=None):
    """ resize pad used for eval"""
    old_h, old_w, c = image.shape
    if old_w > old_h:
        new_w = width
        new_h = int(width / old_w * old_h)
    else:
        new_w = int(height / old_h * old_w)
        new_h = height

    canvas = np.zeros((height, height, c), np.float32)
    if means is not None:
        canvas[...] = means

    if new_w != old_w or new_h != old_h:
        if interpolation is None:
            image = cv2.resize(image, (new_w, new_h))
        else:
            image = cv2.resize(image, (new_w, new_h), interpolation=interpolation)

    padding_h = height - new_h
    padding_w = width - new_w
    if c > 1:
        canvas[:new_h, :new_w] = image
    else:
        if len(image.shape) == 2:
            canvas[:new_h, :new_w, 0] = image
        else:
            canvas[:new_h, :new_w] = image
    return canvas, new_w, new_h, old_w, old_h, padding_w, padding_h


def preprocess(image_path, max_size=512, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    """preprocess"""
    ori_imgs = [cv2.imread(image_path)]

    normalized_imgs = [(img[:, :, ::-1] / 255 - mean) / std for img in ori_imgs]

    imgs_meta = [aspectaware_resize_padding(img, max_size, max_size,
                                            means=None) for img in normalized_imgs]

    framed_imgs = [img_meta[0] for img_meta in imgs_meta]
    framed_metas = [img_meta[1:] for img_meta in imgs_meta]
    return ori_imgs, framed_imgs, framed_metas

def preprocess_video(*frame_from_video, max_size=512, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):
    ori_imgs = frame_from_video
    normalized_imgs = [(img[..., ::-1] / 255 - mean) / std for img in ori_imgs]
    imgs_meta = [aspectaware_resize_padding(img, max_size, max_size,
                                            means=None) for img in normalized_imgs]
    framed_imgs = [img_meta[0] for img_meta in imgs_meta]
    framed_metas = [img_meta[1:] for img_meta in imgs_meta]

    return ori_imgs, framed_imgs, framed_metas

def postprocess(x, anchors, regression, classification, threshold, iou_threshold):
    """postprocess"""

    transformed_anchors = bbox_transform(anchors.asnumpy(), regression.asnumpy())
    transformed_anchors = clipBoxes(transformed_anchors, x.asnumpy())

    classification = classification.asnumpy()
    transformed_anchors = transformed_anchors # [1, 49104, 4]

    scores = np.max(classification, axis=2, keepdims=True)   # [1,49104,1]
    scores_over_thresh = (scores > threshold)[:, :, 0]     # [1,49104]

    out = []

    for i in range(x.shape[0]):     # 1

        if scores_over_thresh[i].sum() == 0:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })
            continue

        classification_per = classification[i, scores_over_thresh[i, :], :]   # (X,90)

        transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], :]   # (X,4)

        scores_per = scores[i, scores_over_thresh[i, :], :]    # (X, 1)

        # nms筛选
        classes_ = np.argmax(classification_per, axis=1)   # (X)
        scores_ = np.amax(classification_per, axis=1)  # (X)

        # (x,4), (x), 0.5
        # anchors_nms_idx = _diou_nms(transformed_anchors_per, scores_per[:, 0], iou_threshold)
        anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold)

        if anchors_nms_idx.shape[0] != 0:
            classes_ = classes_[anchors_nms_idx]
            scores_ = scores_[anchors_nms_idx]
            boxes_ = transformed_anchors_per[anchors_nms_idx, :]
            out.append({
                'rois': boxes_,
                'class_ids': classes_,
                'scores': scores_,
            })
        else:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })

    return out

def get_index_label(label, obj_list):
    index = int(obj_list.index(label))
    return index

def plot_one_box(img, coord, label=None, score=None, color=None, line_thickness=None):
    """ plot box """
    tl = line_thickness or int(round(0.001 * max(img.shape[0:2])))  # line thickness
    color = color
    c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl)
    if label:
        tf = max(tl - 2, 1)  # font thickness
        s_size = cv2.getTextSize(str('{:.0%}'.format(score)), 0, fontScale=float(tl) / 3, thickness=tf)[0]
        t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0] + s_size[0] + 15, c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1)  # filled
        cv2.putText(img, '{}: {:.0%}'.format(label, score), (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0],
                    thickness=tf, lineType=cv2.FONT_HERSHEY_SIMPLEX)

def boolean_string(s):
    if s not in {'False', 'True'}:
        raise ValueError('Not a valid boolean string')
    return s == 'True'