yangyuping
/
pointrendv1_modelarts

 
			
							# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Evaluation for MaskRcnn"""
import time
import os
import numpy as np
import mindspore
from mindspore import context, Tensor, ops, numpy
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common import set_seed
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
from maskrcnn.model_utils.config import config
from maskrcnn.model_utils.moxing_adapter import moxing_wrapper
from maskrcnn.model_utils.device_adapter import get_device_id, get_device_num
from maskrcnn_pointrend.src.maskrcnnPointRend_r50 import maskrcnn_r50_pointrend
from maskrcnn_pointrend.src.dataset import data_to_mindrecord_byte_image, create_maskrcnn_dataset
from maskrcnn_pointrend.src.util import coco_eval, output2json
from maskrcnn_pointrend.src.point_rend.sampling_points import GridSampler
from mindspore.ops import operations as P
set_seed(1)
BYTES_PER_FLOAT = 4
GPU_MEM_LIMIT = 1024 ** 3
dataset_id = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18,
              19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38,
              39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
              57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77,
              78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]

def _postprocess(instances, batched_inputs, img_metas):
    """
    Rescale the output instances to the target size.
    """
    processed_results = []

    num_img = batched_inputs.shape[0]
    for i in range(num_img):
        pred_boxes, pred_classes, pred_scores, pred_masks = instances
        results_per_image = (
            pred_boxes[i].asnumpy(), pred_classes[i].asnumpy(),
            pred_scores[i].asnumpy(), pred_masks[i].asnumpy())
        r = detector_postprocess(results_per_image, img_metas[i])
        if r:
            processed_results.append(r)
    return processed_results


def detector_postprocess(results, img_metas):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    output_height_tmp = img_metas[0]
    output_width_tmp = img_metas[1]
    new_size = (output_height_tmp, output_width_tmp)
    output_boxes = results[0]
    scale_h = img_metas[2]
    scale_w = img_metas[3]
    output_boxes[:, 0::2] /= scale_w
    output_boxes[:, 1::2] /= scale_h
    output_boxes = clip(output_boxes, img_metas[:-2])
    keep = nonempty(output_boxes)
    if not keep.any():
        print("no useful data")
        return []
    temp = []
    for item in results:
        item = item[keep]
        temp.append(item)
    results = temp
    pred_boxes = results[0]
    pred_masks = results[3]
    pred_masks = paste_masks_in_image(pred_masks[:, 0, :, :], pred_boxes, new_size)
    results[3] = pred_masks
    return results

def clip(output_boxes, box_size) -> None:
    """
    Clip (in place) the boxes by limiting x coordinates to the range [0, width]
    and y coordinates to the range [0, height].

    Args:
        box_size (height, width): The clipping box's size.
    """
    output_boxes = Tensor(output_boxes)
    h, w = box_size
    x1 = output_boxes[:, 0].clip(xmin=0, xmax=w)
    y1 = output_boxes[:, 1].clip(xmin=0, xmax=h)
    x2 = output_boxes[:, 2].clip(xmin=0, xmax=w)
    y2 = output_boxes[:, 3].clip(xmin=0, xmax=h)
    stack = ops.Stack(-1)
    output_boxes = stack((x1, y1, x2, y2))
    return output_boxes.asnumpy()

def nonempty(output_boxes, threshold=0.0):
    """
    Find boxes that are non-empty.
    A box is considered empty, if either of its side is no larger than threshold.

    Returns:
        Tensor:
            a binary vector which represents whether each box is empty
            (False) or non-empty (True).
    """
    widths = output_boxes[:, 2] - output_boxes[:, 0]
    heights = output_boxes[:, 3] - output_boxes[:, 1]
    keep = (widths > threshold) & (heights > threshold)
    return keep

def paste_masks_in_image(masks, boxes, image_shape, threshold=0.5):
    """
    Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image.
    The location, height, and width for pasting each mask is determined by their
    corresponding bounding boxes in boxes.

    Note:
        This is a complicated but more accurate implementation. In actual deployment, it is
        often enough to use a faster but less accurate implementation.
        See :func:`paste_mask_in_image_old` in this file for an alternative implementation.

    Args:
        masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of
            detected object instances in the image and Hmask, Wmask are the mask width and mask
            height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1].
        boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4).
            boxes[i] and masks[i] correspond to the same object instance.
        image_shape (tuple): height, width
        threshold (float): A threshold in [0, 1] for converting the (soft) masks to
            binary masks.

    Returns:
        img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the
        number of detected object instances and Himage, Wimage are the image width
        and height. img_masks[i] is a binary mask for object instance i.
    """
    assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported"
    N = len(masks)
    assert len(boxes) == N, boxes.shape

    img_h, img_w = image_shape
    num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
    assert (
        num_chunks <= N
    ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it"
    split = ops.Split(0, num_chunks)
    chunks = split(numpy.arange(N))

    img_masks = numpy.zeros((N, int(img_h), int(img_w)), mindspore.bool_).asnumpy()
    for inds in chunks:
        inds = inds.asnumpy()
        masks_chunk, spatial_inds = _do_paste_mask(
            masks[inds, None, :, :], boxes[inds], int(img_h), int(img_w)
        )

        if threshold >= 0:
            masks_chunk = masks_chunk >= threshold
        else:
            masks_chunk = (masks_chunk * 255)
        img_masks[(inds,) + spatial_inds] = masks_chunk
    return img_masks

def _do_paste_mask(masks, boxes, img_h: int, img_w: int):
    """
    Args:
        masks: N, 1, H, W
        boxes: N, 4
        img_h, img_w (int):
        skip_empty (bool): only paste masks within the region that
            tightly bound all boxes, and returns the results this region only.
            An important optimization for CPU.

    Returns:
        if skip_empty == False, a mask of shape (N, img_h, img_w)
        if skip_empty == True, a mask of shape (N, h', w'), and the slice
            object for the corresponding region.
    """

    x0_int, y0_int = 0, 0
    x1_int, y1_int = img_w, img_h
    split = ops.Split(1, 4)
    x0, y0, x1, y1 = split(Tensor(boxes))

    N = masks.shape[0]

    img_y = numpy.arange(y0_int, y1_int, dtype=mindspore.float32) + 0.5
    img_x = numpy.arange(x0_int, x1_int, dtype=mindspore.float32) + 0.5
    img_y = (img_y - y0) / (y1 - y0) * 2 - 1
    img_x = (img_x - x0) / (x1 - x0) * 2 - 1

    broadcast_to = ops.BroadcastTo((N, img_y.shape[1], img_x.shape[1]))
    stack = ops.Stack(3)
    gx = broadcast_to(img_x[:, None, :])
    gy = broadcast_to(img_y[:, :, None])
    grid = stack([gx, gy])
    gridSample = GridSampler(align_corners=False)
    img_masks = gridSample(Tensor(masks), grid)
    img_masks = img_masks[:, 0]
    logic_and = ops.LogicalAnd()
    mask_x = logic_and(grid[..., 0] >= -1, grid[..., 0] <= 1)
    mask_y = logic_and(grid[..., 1] >= -1, grid[..., 1] <= 1)
    mask_xy = logic_and(mask_x, mask_y).astype(mindspore.float32)
    img_masks = img_masks * mask_xy
    return img_masks.asnumpy(), ()

def maskrcnn_eval(dataset_path, ckpt_path, ann_file):
    """MaskRcnn evaluation."""
    print('\nconfig:\n', config)
    ds = create_maskrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False)
    net = maskrcnn_r50_pointrend(config)
    for item1 in net.parameters_and_names():
        print(item1)
        break
    param_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, param_dict)

    for item in net.parameters_and_names():
        print(item)
        break
    net.set_train(False)

    eval_iter = 0
    total = ds.get_dataset_size()
    outputs = []
    dataset_coco = COCO(ann_file)

    print("\n========================================\n")
    print("total images num: ", total)
    print("Processing, please wait a moment.")

    for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):

        img_id = data['image_id'][0].tolist()
        img_data = data['image']
        img_metas = data['image_shape']
        gt_bboxes = data['box']
        gt_labels = data['label']
        gt_num = data['valid_num']
        gt_mask = data["mask"]

        start = time.time()
        output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num),
                     Tensor(gt_mask))
        end = time.time()
        print("Iter {} cost time {}".format(eval_iter, end - start))
        del gt_bboxes
        del gt_labels
        del gt_mask
        del gt_num
        if not output:
            continue
        processed_results = _postprocess(output, img_data, img_metas)
        del img_data
        if not processed_results:
            continue
        result = process(processed_results, img_id)
        outputs.append(result)
        eval_iter = eval_iter + 1
    if not outputs:
        return
    eval_types = ["bbox", "segm"]
    result_files = output2json(outputs, "./results.pkl", dataset_id)
    coco_eval(result_files, eval_types, dataset_coco, single_result=False)


def process(outputs, img_id):
    """
    Args:
        inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
            It is a list of dict. Each dict corresponds to an image and
            contains keys like "height", "width", "file_name", "image_id".
        outputs: the outputs of a COCO model. It is a list of dicts with key
            "instances" that contains :class:`Instances`.
    """
    predictions = []
    for output in outputs:
        segm_json_results, box_json_results = instances_to_coco_json(output, img_id)
        if segm_json_results:
            predictions.append((segm_json_results, box_json_results))
    return predictions

def instances_to_coco_json(instances, img_id):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances[0])
    if num_instance == 0:
        return []
    boxes = instances[0]
    boxes[:, 2] -= boxes[:, 0] + 1
    boxes[:, 3] -= boxes[:, 1] + 1
    classes = instances[1]
    scores = instances[2]
    pred_masks = instances[3]
    classes = classes.tolist()
    scores = scores.tolist()
    boxes = boxes.tolist()
    rles = [
        maskUtils.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
        for mask in pred_masks
    ]
    for rle in rles:
        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
        # json writer which always produces strings cannot serialize a bytestream
        # unless you decode it. Thankfully, utf-8 works out (which is also what
        # the pycocotools/_mask.pyx does).
        rle["counts"] = rle["counts"].decode("utf-8")

    segm_json_results = []
    box_json_results = []
    for k in range(num_instance):
        seg_result = {
            "image_id": img_id,
            "category_id": classes[k],
            "score": scores[k],
        }
        box_result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
        }
        box_json_results.append(box_result)
        seg_result["segmentation"] = rles[k]
        segm_json_results.append(seg_result)
    return segm_json_results, box_json_results

def modelarts_process():
    """ modelarts process """

    def unzip(zip_file, save_dir):
        import zipfile
        s_time = time.time()
        if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
            zip_isexist = zipfile.is_zipfile(zip_file)
            if zip_isexist:
                fz = zipfile.ZipFile(zip_file, 'r')
                data_num = len(fz.namelist())
                print("Extract Start...")
                print("unzip file num: {}".format(data_num))
                data_print = int(data_num / 100) if data_num > 100 else 1
                i = 0
                for file in fz.namelist():
                    if i % data_print == 0:
                        print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
                    i += 1
                    fz.extract(file, save_dir)
                print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60), \
                                                     int(int(time.time() - s_time) % 60)))
                print("Extract Done.")
            else:
                print("This is not zip.")
        else:
            print("Zip has been extracted.")

    if config.need_modelarts_dataset_unzip:
        zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
        save_dir_1 = os.path.join(config.data_path)

        sync_lock = "/tmp/unzip_sync.lock"
        if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
            print("Zip file path: ", zip_file_1)
            print("Unzip file save dir: ", save_dir_1)
            unzip(zip_file_1, save_dir_1)
            print("===Finish extract data synchronization===")
            try:
                os.mknod(sync_lock)
            except IOError:
                pass

        while True:
            if os.path.exists(sync_lock):
                break
            time.sleep(1)

        print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
        print("#" * 200, os.listdir(save_dir_1))
        print("#" * 200, os.listdir(os.path.join(config.data_path, config.modelarts_dataset_unzip_name)))

        config.coco_root = os.path.join(config.data_path, config.modelarts_dataset_unzip_name)
    config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path)
    config.ann_file = os.path.join(config.coco_root, config.ann_file)


@moxing_wrapper(pre_process=modelarts_process)
def eval_():
    device_target = config.device_target
    context.set_context(mode=context.PYNATIVE_MODE, device_target=device_target)

    if config.device_target == "Ascend":
        context.set_context(device_id=config.device_id)
    else:
        context.set_context(device_id=config.device_id)

    prefix = "MaskRcnn_eval.mindrecord"
    mindrecord_dir = config.mindrecord_dir
    mindrecord_file = os.path.join(mindrecord_dir, prefix)
    if not os.path.exists(mindrecord_file):
        if not os.path.isdir(mindrecord_dir):
            os.makedirs(mindrecord_dir)
        if config.dataset == "coco":
            if os.path.isdir(config.coco_root):
                print("Create Mindrecord.")
                data_to_mindrecord_byte_image("coco", False, prefix, file_num=1)
                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
            else:
                print("coco_root not exits.")
        else:
            if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH):
                print("Create Mindrecord.")
                data_to_mindrecord_byte_image("other", False, prefix, file_num=1)
                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
            else:
                print("IMAGE_DIR or ANNO_PATH not exits.")

    print("Start Eval!")
    ann_file = config.coco_root + "/annotations/instances_val2017.json"
    maskrcnn_eval(mindrecord_file, config.checkpoint_path, ann_file)


if __name__ == '__main__':
    eval_()