OpenModelZoo
/
FastSCNN

 
			
			   
				 
					
						
						
							
							'''
The scripts to execute sdk infer
'''
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import argparse
import os
import time
import numpy as np
import PIL.Image as Image
from tabulate import tabulate

import MxpiDataType_pb2 as MxpiDataType
from StreamManagerApi import StreamManagerApi, InProtobufVector, \
    MxProtobufIn, StringVector

def parse_args():
    """set and check parameters."""
    parser = argparse.ArgumentParser(description="FastSCNN process")
    parser.add_argument("--pipeline", type=str, default=None, help="SDK infer pipeline")
    parser.add_argument("--image_path", type=str, default=None, help="root path of image")
    parser.add_argument('--image_width', default=768, type=int, help='image width')
    parser.add_argument('--image_height', default=768, type=int, help='image height')
    parser.add_argument('--save_mask', default=1, type=int, help='0 for False, 1 for True')
    parser.add_argument('--mask_result_path', default='./mask_result', type=str,
                        help='the folder to save the semantic mask images')
    args_opt = parser.parse_args()
    return args_opt

def send_source_data(appsrc_id, tensor, stream_name, stream_manager):
    """
    Construct the input of the stream,
    send inputs data to a specified stream based on streamName.

    Returns:
        bool: send data success or not
    """
    tensor_package_list = MxpiDataType.MxpiTensorPackageList()
    tensor_package = tensor_package_list.tensorPackageVec.add()
    array_bytes = tensor.tobytes()
    tensor_vec = tensor_package.tensorVec.add()
    tensor_vec.deviceId = 0
    tensor_vec.memType = 0
    for i in tensor.shape:
        tensor_vec.tensorShape.append(i)
    tensor_vec.dataStr = array_bytes
    tensor_vec.tensorDataSize = len(array_bytes)
    key = "appsrc{}".format(appsrc_id).encode('utf-8')
    protobuf_vec = InProtobufVector()
    protobuf = MxProtobufIn()
    protobuf.key = key
    protobuf.type = b'MxTools.MxpiTensorPackageList'
    protobuf.protobuf = tensor_package_list.SerializeToString()
    protobuf_vec.push_back(protobuf)

    ret = stream_manager.SendProtobuf(stream_name, appsrc_id, protobuf_vec)
    if ret < 0:
        print("Failed to send data to stream.")
        return False
    return True

cityspallete = [
    128, 64, 128,
    244, 35, 232,
    70, 70, 70,
    102, 102, 156,
    190, 153, 153,
    153, 153, 153,
    250, 170, 30,
    220, 220, 0,
    107, 142, 35,
    152, 251, 152,
    0, 130, 180,
    220, 20, 60,
    255, 0, 0,
    0, 0, 142,
    0, 0, 70,
    0, 60, 100,
    0, 80, 100,
    0, 0, 230,
    119, 11, 32,
]
classes = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light',
           'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car',
           'truck', 'bus', 'train', 'motorcycle', 'bicycle')

valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
                 23, 24, 25, 26, 27, 28, 31, 32, 33]

_key = np.array([-1, -1, -1, -1, -1, -1,
                 -1, -1, 0, 1, -1, -1,
                 2, 3, 4, -1, -1, -1,
                 5, -1, 6, 7, 8, 9,
                 10, 11, 12, 13, 14, 15,
                 -1, -1, 16, 17, 18])
_mapping = np.array(range(-1, len(_key) - 1)).astype('int32')

def _get_city_pairs(folder, split='train'):
    '''_get_city_pairs'''
    def get_path_pairs(img_folder, mask_folder):
        img_paths = []
        mask_paths = []
        for root, _, files in os.walk(img_folder):
            for filename in files:
                if filename.startswith('._'):
                    continue
                if filename.endswith('.png'):
                    imgpath = os.path.join(root, filename)
                    foldername = os.path.basename(os.path.dirname(imgpath))
                    maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
                    maskpath = os.path.join(mask_folder, foldername, maskname)
                    if os.path.isfile(imgpath) and os.path.isfile(maskpath):
                        img_paths.append(imgpath)
                        mask_paths.append(maskpath)
                    else:
                        print('cannot find the mask or image:', imgpath, maskpath)
        print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
        return img_paths, mask_paths

    if split in ('train', 'val'):
        img_folder = os.path.join(folder, 'leftImg8bit' + os.sep + split)
        mask_folder = os.path.join(folder, 'gtFine' + os.sep + split)
        img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
        return img_paths, mask_paths
    assert split == 'trainval'
    print('trainval set')
    train_img_folder = os.path.join(folder, 'leftImg8bit' + os.sep + 'train')
    train_mask_folder = os.path.join(folder, 'gtFine' + os.sep + 'train')
    val_img_folder = os.path.join(folder, 'leftImg8bit' + os.sep + 'val')
    val_mask_folder = os.path.join(folder, 'gtFine' + os.sep + 'val')
    train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
    val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
    img_paths = train_img_paths + val_img_paths
    mask_paths = train_mask_paths + val_mask_paths
    return img_paths, mask_paths

def _val_sync_transform(outsize, img, mask):
    '''_val_sync_transform'''
    short_size = min(outsize)
    w, h = img.size
    if w > h:
        oh = short_size
        ow = int(1.0 * w * oh / h)
    else:
        ow = short_size
        oh = int(1.0 * h * ow / w)
    img = img.resize((ow, oh), Image.BILINEAR)
    mask = mask.resize((ow, oh), Image.NEAREST)
    # center crop
    w, h = img.size
    x1 = int(round((w - outsize[1]) / 2.))
    y1 = int(round((h - outsize[0]) / 2.))
    img = img.crop((x1, y1, x1 + outsize[1], y1 + outsize[0]))
    mask = mask.crop((x1, y1, x1 + outsize[1], y1 + outsize[0]))

    # final transform
    img, mask = np.array(img), _mask_transform(mask)
    return img, mask

def _class_to_index(mask):
    # assert the value
    values = np.unique(mask)
    for value in values:
        assert value in _mapping
    index = np.digitize(mask.ravel(), _mapping, right=True)
    return _key[index].reshape(mask.shape)

def _mask_transform(mask):
    target = _class_to_index(np.array(mask).astype('int32'))
    return np.array(target).astype('int32')
class SegmentationMetric():
    """Computes pixAcc and mIoU metric scores
    """

    def __init__(self, nclass):
        super(SegmentationMetric, self).__init__()
        self.nclass = nclass
        self.reset()

    def update(self, preds, labels):
        """Updates the internal evaluation result.

        Parameters
        ----------
        labels : 'NumpyArray' or list of `NumpyArray`
            The labels of the data.
        preds : 'NumpyArray' or list of `NumpyArray`
            Predicted values.
        """
        def evaluate_worker(self, pred, label):
            correct, labeled = batch_pix_accuracy(pred, label)
            inter, union = batch_intersection_union(pred, label, self.nclass)
            self.total_correct += correct
            self.total_label += labeled
            self.total_inter += inter
            self.total_union += union
        evaluate_worker(self, preds, labels)

    def get(self, return_category_iou=False):
        """Gets the current evaluation result.

        Returns
        -------
        metrics : tuple of float
            pixAcc and mIoU
        """
        # remove np.spacing(1)
        pixAcc = 1.0 * self.total_correct / (2.220446049250313e-16 + self.total_label)
        IoU = 1.0 * self.total_inter / (2.220446049250313e-16 + self.total_union)
        mIoU = IoU.mean().item()
        if return_category_iou:
            return pixAcc, mIoU, IoU
        return pixAcc, mIoU

    def reset(self):
        """Resets the internal evaluation result to initial state."""
        self.total_inter = np.zeros(self.nclass)
        self.total_union = np.zeros(self.nclass)
        self.total_correct = 0
        self.total_label = 0

def batch_pix_accuracy(output, target):
    """PixAcc"""
    # inputs are numpy array, output 4D NCHW where 'C' means label classes, target 3D NHW

    predict = np.argmax(output.astype(np.int64), 1) + 1
    target = target.astype(np.int64) + 1
    pixel_labeled = (target > 0).sum()
    pixel_correct = ((predict == target) * (target > 0)).sum()
    assert pixel_correct <= pixel_labeled, "Correct area should be smaller than Labeled"
    return pixel_correct, pixel_labeled

def batch_intersection_union(output, target, nclass):
    """mIoU"""
    # inputs are numpy array, output 4D, target 3D
    mini = 1
    maxi = nclass
    nbins = nclass
    predict = np.argmax(output.astype(np.float32), 1) + 1
    target = target.astype(np.float32) + 1

    predict = predict.astype(np.float32) * (target > 0).astype(np.float32)
    intersection = predict * (predict == target).astype(np.float32)
    # areas of intersection and union
    # element 0 in intersection occur the main difference from np.bincount. set boundary to -1 is necessary.
    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
    area_union = area_pred + area_lab - area_inter
    assert (area_inter > area_union).sum() == 0, "Intersection area should be smaller than Union area"
    return area_inter.astype(np.float32), area_union.astype(np.float32)

def main():
    """
    read pipeline and do infer
    """

    args = parse_args()

    # init stream manager
    stream_manager_api = StreamManagerApi()
    ret = stream_manager_api.InitManager()
    if ret != 0:
        print("Failed to init Stream manager, ret=%s" % str(ret))
        return

    # create streams by pipeline config file
    with open(os.path.realpath(args.pipeline), 'rb') as f:
        pipeline_str = f.read()
    ret = stream_manager_api.CreateMultipleStreams(pipeline_str)
    if ret != 0:
        print("Failed to create Stream, ret=%s" % str(ret))
        return

    stream_name = b'fastscnn'
    infer_total_time = 0
    assert os.path.exists(args.image_path), "Please put dataset in " + str(args.image_path)
    images, mask_paths = _get_city_pairs(args.image_path, 'val')
    assert len(images) == len(mask_paths)
    if not images:
        raise RuntimeError("Found 0 images in subfolders of:" + args.image_path + "\n")

    if args.save_mask and not os.path.exists(args.mask_result_path):
        os.makedirs(args.mask_result_path)
    metric = SegmentationMetric(19)
    metric.reset()
    for index in range(len(images)):
        image_name = images[index].split(os.sep)[-1].split(".")[0]  # get the name of image file
        print("Processing ---> ", image_name)
        img = Image.open(images[index]).convert('RGB')
        mask = Image.open(mask_paths[index])
        img, mask = _val_sync_transform((args.image_height, args.image_width), img, mask)

        img = img.astype(np.float32)
        mask = mask.astype(np.int32)
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        img = img.transpose((2, 0, 1))#HWC->CHW
        for channel, _ in enumerate(img):
            # Normalization
            img[channel] /= 255
            img[channel] -= mean[channel]
            img[channel] /= std[channel]

        img = np.expand_dims(img, 0)#NCHW
        mask = np.expand_dims(mask, 0)#NHW

        if not send_source_data(0, img, stream_name, stream_manager_api):
            return
        # Obtain the inference result by specifying streamName and uniqueId.
        key_vec = StringVector()
        key_vec.push_back(b'modelInfer')
        start_time = time.time()
        infer_result = stream_manager_api.GetProtobuf(stream_name, 0, key_vec)
        infer_total_time += time.time() - start_time
        if infer_result.size() == 0:
            print("inferResult is null")
            return
        if infer_result[0].errorCode != 0:
            print("GetProtobuf error. errorCode=%d" % (infer_result[0].errorCode))
            return
        result = MxpiDataType.MxpiTensorPackageList()
        result.ParseFromString(infer_result[0].messageBuf)
        res = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype='<f4')
        mask_image = res.reshape(1, 19, args.image_height, args.image_width)

        metric.update(mask_image, mask)
        pixAcc, mIoU = metric.get()
        print("[EVAL] Sample: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format(index + 1, pixAcc * 100, mIoU * 100))
        if args.save_mask:
            output = np.argmax(mask_image[0], axis=0)
            out_img = Image.fromarray(output.astype('uint8'))
            out_img.putpalette(cityspallete)
            outname = str(image_name) + '.png'
            out_img.save(os.path.join(args.mask_result_path, outname))

    pixAcc, mIoU, category_iou = metric.get(return_category_iou=True)
    print('End validation pixAcc: {:.3f}, mIoU: {:.3f}'.format(pixAcc * 100, mIoU * 100))
    txtName = os.path.join(args.mask_result_path, "eval_results.txt")
    with open(txtName, "w") as f:
        string = 'validation pixAcc:' + str(pixAcc * 100) + ', mIoU:' + str(mIoU * 100)
        f.write(string)
        f.write('\n')
        headers = ['class id', 'class name', 'iou']
        table = []
        for i, cls_name in enumerate(classes):
            table.append([cls_name, category_iou[i]])
            string = 'class name: ' + cls_name + ' iou: ' + str(category_iou[i]) + '\n'
            f.write(string)
        print('Category iou: \n {}'.format(tabulate(table, headers, \
                               tablefmt='grid', showindex="always", numalign='center', stralign='center')))
    print("Testing finished....")
    print("=======================================")
    print("The total time of inference is {} s".format(infer_total_time))
    print("=======================================")

    # destroy streams
    stream_manager_api.DestroyAllStreams()

if __name__ == '__main__':
    main()