|
- # Copyright (c) OpenMMLab. All rights reserved.
- import numpy as np
-
- from mmseg.core.evaluation import (eval_metrics, mean_dice, mean_fscore,
- mean_iou)
- from mmseg.core.evaluation.metrics import f_score
-
-
- def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
- """Intersection over Union
- Args:
- pred_label (np.ndarray): 2D predict map
- label (np.ndarray): label 2D label map
- num_classes (int): number of categories
- ignore_index (int): index ignore in evaluation
- """
-
- mask = (label != ignore_index)
- pred_label = pred_label[mask]
- label = label[mask]
-
- n = num_classes
- inds = n * label + pred_label
-
- mat = np.bincount(inds, minlength=n**2).reshape(n, n)
-
- return mat
-
-
- # This func is deprecated since it's not memory efficient
- def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index):
- num_imgs = len(results)
- assert len(gt_seg_maps) == num_imgs
- total_mat = np.zeros((num_classes, num_classes), dtype=np.float32)
- for i in range(num_imgs):
- mat = get_confusion_matrix(
- results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
- total_mat += mat
- all_acc = np.diag(total_mat).sum() / total_mat.sum()
- acc = np.diag(total_mat) / total_mat.sum(axis=1)
- iou = np.diag(total_mat) / (
- total_mat.sum(axis=1) + total_mat.sum(axis=0) - np.diag(total_mat))
-
- return all_acc, acc, iou
-
-
- # This func is deprecated since it's not memory efficient
- def legacy_mean_dice(results, gt_seg_maps, num_classes, ignore_index):
- num_imgs = len(results)
- assert len(gt_seg_maps) == num_imgs
- total_mat = np.zeros((num_classes, num_classes), dtype=np.float32)
- for i in range(num_imgs):
- mat = get_confusion_matrix(
- results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
- total_mat += mat
- all_acc = np.diag(total_mat).sum() / total_mat.sum()
- acc = np.diag(total_mat) / total_mat.sum(axis=1)
- dice = 2 * np.diag(total_mat) / (
- total_mat.sum(axis=1) + total_mat.sum(axis=0))
-
- return all_acc, acc, dice
-
-
- # This func is deprecated since it's not memory efficient
- def legacy_mean_fscore(results,
- gt_seg_maps,
- num_classes,
- ignore_index,
- beta=1):
- num_imgs = len(results)
- assert len(gt_seg_maps) == num_imgs
- total_mat = np.zeros((num_classes, num_classes), dtype=np.float32)
- for i in range(num_imgs):
- mat = get_confusion_matrix(
- results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
- total_mat += mat
- all_acc = np.diag(total_mat).sum() / total_mat.sum()
- recall = np.diag(total_mat) / total_mat.sum(axis=1)
- precision = np.diag(total_mat) / total_mat.sum(axis=0)
- fv = np.vectorize(f_score)
- fscore = fv(precision, recall, beta=beta)
-
- return all_acc, recall, precision, fscore
-
-
- def test_metrics():
- pred_size = (10, 30, 30)
- num_classes = 19
- ignore_index = 255
- results = np.random.randint(0, num_classes, size=pred_size)
- label = np.random.randint(0, num_classes, size=pred_size)
-
- # Test the availability of arg: ignore_index.
- label[:, 2, 5:10] = ignore_index
-
- # Test the correctness of the implementation of mIoU calculation.
- ret_metrics = eval_metrics(
- results, label, num_classes, ignore_index, metrics='mIoU')
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'IoU']
- all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
- ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(iou, iou_l)
- # Test the correctness of the implementation of mDice calculation.
- ret_metrics = eval_metrics(
- results, label, num_classes, ignore_index, metrics='mDice')
- all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'Dice']
- all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
- ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(dice, dice_l)
- # Test the correctness of the implementation of mDice calculation.
- ret_metrics = eval_metrics(
- results, label, num_classes, ignore_index, metrics='mFscore')
- all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[
- 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore']
- all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore(
- results, label, num_classes, ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(recall, recall_l)
- assert np.allclose(precision, precision_l)
- assert np.allclose(fscore, fscore_l)
- # Test the correctness of the implementation of joint calculation.
- ret_metrics = eval_metrics(
- results,
- label,
- num_classes,
- ignore_index,
- metrics=['mIoU', 'mDice', 'mFscore'])
- all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[
- 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[
- 'Dice'], ret_metrics['Precision'], ret_metrics[
- 'Recall'], ret_metrics['Fscore']
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(iou, iou_l)
- assert np.allclose(dice, dice_l)
- assert np.allclose(precision, precision_l)
- assert np.allclose(recall, recall_l)
- assert np.allclose(fscore, fscore_l)
-
- # Test the correctness of calculation when arg: num_classes is larger
- # than the maximum value of input maps.
- results = np.random.randint(0, 5, size=pred_size)
- label = np.random.randint(0, 4, size=pred_size)
- ret_metrics = eval_metrics(
- results,
- label,
- num_classes,
- ignore_index=255,
- metrics='mIoU',
- nan_to_num=-1)
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'IoU']
- assert acc[-1] == -1
- assert iou[-1] == -1
-
- ret_metrics = eval_metrics(
- results,
- label,
- num_classes,
- ignore_index=255,
- metrics='mDice',
- nan_to_num=-1)
- all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'Dice']
- assert acc[-1] == -1
- assert dice[-1] == -1
-
- ret_metrics = eval_metrics(
- results,
- label,
- num_classes,
- ignore_index=255,
- metrics='mFscore',
- nan_to_num=-1)
- all_acc, precision, recall, fscore = ret_metrics['aAcc'], ret_metrics[
- 'Precision'], ret_metrics['Recall'], ret_metrics['Fscore']
- assert precision[-1] == -1
- assert recall[-1] == -1
- assert fscore[-1] == -1
-
- ret_metrics = eval_metrics(
- results,
- label,
- num_classes,
- ignore_index=255,
- metrics=['mDice', 'mIoU', 'mFscore'],
- nan_to_num=-1)
- all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[
- 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[
- 'Dice'], ret_metrics['Precision'], ret_metrics[
- 'Recall'], ret_metrics['Fscore']
- assert acc[-1] == -1
- assert dice[-1] == -1
- assert iou[-1] == -1
- assert precision[-1] == -1
- assert recall[-1] == -1
- assert fscore[-1] == -1
-
- # Test the bug which is caused by torch.histc.
- # torch.histc: https://pytorch.org/docs/stable/generated/torch.histc.html
- # When the arg:bins is set to be same as arg:max,
- # some channels of mIoU may be nan.
- results = np.array([np.repeat(31, 59)])
- label = np.array([np.arange(59)])
- num_classes = 59
- ret_metrics = eval_metrics(
- results, label, num_classes, ignore_index=255, metrics='mIoU')
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'IoU']
- assert not np.any(np.isnan(iou))
-
-
- def test_mean_iou():
- pred_size = (10, 30, 30)
- num_classes = 19
- ignore_index = 255
- results = np.random.randint(0, num_classes, size=pred_size)
- label = np.random.randint(0, num_classes, size=pred_size)
- label[:, 2, 5:10] = ignore_index
- ret_metrics = mean_iou(results, label, num_classes, ignore_index)
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'IoU']
- all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
- ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(iou, iou_l)
-
- results = np.random.randint(0, 5, size=pred_size)
- label = np.random.randint(0, 4, size=pred_size)
- ret_metrics = mean_iou(
- results, label, num_classes, ignore_index=255, nan_to_num=-1)
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'IoU']
- assert acc[-1] == -1
- assert acc[-1] == -1
-
-
- def test_mean_dice():
- pred_size = (10, 30, 30)
- num_classes = 19
- ignore_index = 255
- results = np.random.randint(0, num_classes, size=pred_size)
- label = np.random.randint(0, num_classes, size=pred_size)
- label[:, 2, 5:10] = ignore_index
- ret_metrics = mean_dice(results, label, num_classes, ignore_index)
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'Dice']
- all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
- ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(iou, dice_l)
-
- results = np.random.randint(0, 5, size=pred_size)
- label = np.random.randint(0, 4, size=pred_size)
- ret_metrics = mean_dice(
- results, label, num_classes, ignore_index=255, nan_to_num=-1)
- all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[
- 'Dice']
- assert acc[-1] == -1
- assert dice[-1] == -1
-
-
- def test_mean_fscore():
- pred_size = (10, 30, 30)
- num_classes = 19
- ignore_index = 255
- results = np.random.randint(0, num_classes, size=pred_size)
- label = np.random.randint(0, num_classes, size=pred_size)
- label[:, 2, 5:10] = ignore_index
- ret_metrics = mean_fscore(results, label, num_classes, ignore_index)
- all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[
- 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore']
- all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore(
- results, label, num_classes, ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(recall, recall_l)
- assert np.allclose(precision, precision_l)
- assert np.allclose(fscore, fscore_l)
-
- ret_metrics = mean_fscore(
- results, label, num_classes, ignore_index, beta=2)
- all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[
- 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore']
- all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore(
- results, label, num_classes, ignore_index, beta=2)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(recall, recall_l)
- assert np.allclose(precision, precision_l)
- assert np.allclose(fscore, fscore_l)
-
- results = np.random.randint(0, 5, size=pred_size)
- label = np.random.randint(0, 4, size=pred_size)
- ret_metrics = mean_fscore(
- results, label, num_classes, ignore_index=255, nan_to_num=-1)
- all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[
- 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore']
- assert recall[-1] == -1
- assert precision[-1] == -1
- assert fscore[-1] == -1
-
-
- def test_filename_inputs():
- import tempfile
-
- import cv2
-
- def save_arr(input_arrays: list, title: str, is_image: bool, dir: str):
- filenames = []
- SUFFIX = '.png' if is_image else '.npy'
- for idx, arr in enumerate(input_arrays):
- filename = '{}/{}-{}{}'.format(dir, title, idx, SUFFIX)
- if is_image:
- cv2.imwrite(filename, arr)
- else:
- np.save(filename, arr)
- filenames.append(filename)
- return filenames
-
- pred_size = (10, 30, 30)
- num_classes = 19
- ignore_index = 255
- results = np.random.randint(0, num_classes, size=pred_size)
- labels = np.random.randint(0, num_classes, size=pred_size)
- labels[:, 2, 5:10] = ignore_index
-
- with tempfile.TemporaryDirectory() as temp_dir:
-
- result_files = save_arr(results, 'pred', False, temp_dir)
- label_files = save_arr(labels, 'label', True, temp_dir)
-
- ret_metrics = eval_metrics(
- result_files,
- label_files,
- num_classes,
- ignore_index,
- metrics='mIoU')
- all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics[
- 'Acc'], ret_metrics['IoU']
- all_acc_l, acc_l, iou_l = legacy_mean_iou(results, labels, num_classes,
- ignore_index)
- assert np.allclose(all_acc, all_acc_l)
- assert np.allclose(acc, acc_l)
- assert np.allclose(iou, iou_l)
|