|
- import glob
- import os
-
- import configargparse
- import cv2
- import json
- import h5py
- from tqdm import tqdm
- import torch
- import torch.nn.functional as F
-
- from PIL import Image
- import numpy as np
-
- from utils.metrics import ConfusionMatrix, calculate_ap
-
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
- def cal_miou_replica_tensorf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- for sem_interval in [20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path, scene_id,
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- 'ablate_SDF', f'{scene_id_simple}_interval{sem_interval}_insPE',
- 'imgs_vis', 'sem',
- f'049999_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- # with open(f'{pred_dir_path}/miou_{scene_id}_39999.txt', 'w') as f:
- with open(f'{pred_dir_path}/miou_{scene_id}_49999.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_replica_tensorf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- # scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- pq_result_desc = ''
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- # pred_image_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'ablate_SDF', f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_{pred_idx:03d}.png')
- if os.path.exists(os.path.join(pred_dir_path, f'{scene_id}_far48_stretch7_unfreezesemfeabasis_useconv_interval{sem_interval}')):
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id}_far48_stretch7_unfreezesemfeabasis_useconv_interval{sem_interval}',
- 'imgs_vis', 'ins',
- f'039999_{pred_idx:03d}.png')
- elif os.path.exists(os.path.join(pred_dir_path, f'{scene_id}_far48_stretch5_5_unfreezesemfeabasis_useconv_interval{sem_interval}')):
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id}_far48_stretch5_5_unfreezesemfeabasis_useconv_interval{sem_interval}',
- 'imgs_vis', 'ins',
- f'039999_{pred_idx:03d}.png')
- elif os.path.exists(os.path.join(pred_dir_path, f'{scene_id}_far55_stretch7_unfreezesemfeabasis_useconv_newconv_interval{sem_interval}')):
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id}_far55_stretch7_unfreezesemfeabasis_useconv_newconv_interval{sem_interval}',
- 'imgs_vis', 'ins',
- f'039999_{pred_idx:03d}.png')
- elif os.path.exists(os.path.join(pred_dir_path, f'{scene_id}_interval{sem_interval}')):
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id}_interval{sem_interval}',
- 'imgs_vis', 'ins',
- f'039999_{pred_idx:03d}.png')
- else:
- raise
-
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''pq'''
- metric_pq = []
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id}.txt', 'w') as f:
- f.write(pq_result_desc)
-
-
- def cal_miou_replica_tensorf_ablation(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- # ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- # with h5py.File(ins_rgbs_path, 'r') as f:
- # ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- sem_interval = 20
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_label_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel_norawfeas',
- 'imgs_vis', 'sem', f'039999_label_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w)
- pred_label_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_label_maps = np.stack(pred_label_maps, 0) # (num_imgs, h*w)
-
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_label_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id}_ablation.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_replica_tensorf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- map_result_desc = ''
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- for sem_interval in [20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_sem_maps = []
- pred_sem_maps = []
-
- pred_logit_dir_path = os.path.join(pred_dir_path, scene_id,
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- 'imgs_vis', 'sem')
- # pred_logit_path = os.path.join(pred_logit_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- pred_logit_paths = glob.glob(os.path.join(pred_logit_dir_path, '049999_logit_*.npy'))
- all_ap = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- if len(pred_logit_paths) > 0:
- pred_logit = np.load(os.path.join(pred_logit_dir_path, f'049999_logit_{pred_idx:03d}.npy'))
- pred_logit = F.softmax(torch.from_numpy(pred_logit), dim=-1).numpy()
-
- pred_conf_mask = np.max(pred_logit, axis=-1)
- pred_label_map = np.argmax(pred_logit, axis=-1)
- unique_pred_labels = np.unique(pred_label_map)
- H, W = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(-1, 1)
-
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(-1, 1)
- gt_label_map = np.zeros_like(gt_sem_map, dtype=np.int64)
- unique_sem_map = np.unique(gt_sem_map)
- for sem_map in unique_sem_map:
- # todo: add judgement
- gt_label_map[gt_sem_map == sem_map] = ins2label[str(sem_map)]
- # gt_label_map = gt_label_map.reshape(H, W)
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix), (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
-
- '''confidence values'''
- # prepare confidence values
- unique_pred_labels, unique_gt_labels = torch.from_numpy(unique_pred_labels), torch.from_numpy(unique_gt_labels)
- pred_label_map = torch.from_numpy(pred_label_map).reshape(H, W)
- pred_conf_mask = torch.from_numpy(pred_conf_mask)
- conf_scores = torch.zeros_like(unique_gt_labels, dtype=torch.float32)
- for i, label in enumerate(unique_gt_labels):
- if label.item() in unique_pred_labels:
- index = torch.where(pred_label_map == label)
- ssm = pred_conf_mask[index[0], index[1]] # confidence value
- pred_obj_conf = torch.median(ssm).item() # median confidence value for one object
- conf_scores[i] = pred_obj_conf
-
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
- conf_scores = conf_scores.to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, confidence=conf_scores, function_select='integral')
- all_ap.append(ap)
- else:
- pred_image_path = os.path.join(pred_logit_dir_path, f'049999_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- if len(pred_logit_paths) == 0:
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_label_map, gt_label_map in zip(pred_label_maps, gt_label_maps):
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- # with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_miou_scannet_tensorf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
- miou_result_desc = ''
-
- for sem_inerval in [1, 2, 5, 10, 20]:
- # for sem_inerval in [1, 2, 5]:
- # for sem_inerval in [10, 20]:
- # for sem_inerval in [20]:
- sparse_ratio = 1 - 1 / sem_inerval
- gt_label_maps = []
- pred_label_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, scene_id,
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel_raw40000_sem170000',
- 'imgs_vis', 'sem',
- f'099999_label_{pred_idx:03d}.png')
- # f'079999_label_{pred_idx:03d}.png')
- # f'119999_label_{pred_idx:03d}.png')
- # f'199999_label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
- ckpt = torch.load(os.path.join(pred_dir_path, scene_id, 'rawfeas',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_20_again.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_10.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_10_select.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_scannet_tensorf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
-
- ckpt = torch.load(os.path.join(pred_dir_path, scene_id, 'rawfeas',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- pq_result_desc = ''
-
- for sem_inerval in [1, 2, 5, 10, 20]:
- # for sem_inerval in [1, 2, 5]:
- # for sem_inerval in [10, 20]:
- # for sem_inerval in [20]:
- sparse_ratio = 1 - 1 / sem_inerval
- gt_label_maps = []
- pred_label_maps = []
-
- pred_dir_path_sparse = os.path.join(pred_dir_path, scene_id,
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel')
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, scene_id,
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel',
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel_raw40000_sem170000',
- # 'imgs_vis', 'sem',
- # f'099999_label_{pred_idx:03d}.png')
- # # f'079999_label_{pred_idx:03d}.png')
- # # f'119999_label_{pred_idx:03d}.png')
- # # f'199999_label_{pred_idx:03d}.png')
- if os.path.exists(
- os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem', f'099999_label_{pred_idx:03d}.png')):
- pred_image_path = os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem', f'099999_label_{pred_idx:03d}.png')
- elif os.path.exists(
- os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem', f'079999_label_{pred_idx:03d}.png')):
- pred_image_path = os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem',
- f'079999_label_{pred_idx:03d}.png')
- elif os.path.exists(
- os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem', f'119999_label_{pred_idx:03d}.png')):
- pred_image_path = os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem',
- f'119999_label_{pred_idx:03d}.png')
- elif os.path.exists(
- os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem', f'199999_label_{pred_idx:03d}.png')):
- pred_image_path = os.path.join(pred_dir_path_sparse, 'imgs_vis', 'sem',
- f'199999_label_{pred_idx:03d}.png')
- else:
- raise
-
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq * 100}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
-
- def cal_miou_replica_sam(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path,
- # f'{scene_id_simple}_vitb_grid32_min0_intersection',
- # f'{scene_id_simple}_vitl_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid16_min0_intersection',
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid32_min5000_intersection',
- # f'{scene_id_simple}_vith_grid64_min0_intersection',
- # f'{scene_id_simple}_vith_grid64_min50_intersection',
- # f'{scene_id_simple}_vith_grid64_min500_intersection',
- # f'{scene_id_simple}_vith_grid64_min5000_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # '''fill with gt'''
- # ignore_label_mask = (pred_label_map == -1)
- # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- #
- # '''ignore'''
- # # valid_label_mask = (pred_label_map != -1)
- # # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- '''fill with gt'''
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
-
- '''ignore'''
- valid_label_mask = (pred_label_maps_flatten != -1)
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[valid_label_mask], gt_label_maps_flatten[valid_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"miou = {miou}")
-
- miou_result_desc = f"miou = {miou}\n"
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_replica_sam(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path,
- # f'{scene_id_simple}_vitb_grid32_min0_intersection',
- # f'{scene_id_simple}_vitl_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid16_min0_intersection',
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid32_min5000_intersection',
- # f'{scene_id_simple}_vith_grid64_min0_intersection',
- # f'{scene_id_simple}_vith_grid64_min50_intersection',
- # f'{scene_id_simple}_vith_grid64_min500_intersection',
- # f'{scene_id_simple}_vith_grid64_min5000_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- '''fill with gt'''
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- '''ignore'''
- # valid_label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"pq = {mpq}")
- pq_result_desc = f"pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
- # '''miou'''
- # metric_ious = []
- # val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- #
- # '''avg on imgs'''
- # # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # # '''fill with gt'''
- # # ignore_label_mask = (pred_label_map == -1)
- # # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- # #
- # # '''ignore'''
- # # # valid_label_mask = (pred_label_map != -1)
- # # # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
- # #
- # # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # # metric_ious.append(metric_iou)
- #
- # '''avg on pixels'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # '''fill with gt'''
- # # ignore_label_mask = (pred_label_maps_flatten == -1)
- # # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # '''ignore'''
- # valid_label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[valid_label_mask], gt_label_maps_flatten[valid_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
- #
- # miou = np.mean(np.array(metric_ious))
- # print(f"miou = {miou}")
- #
- # miou_result_desc = f"miou = {miou}\n"
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- # f.write(miou_result_desc)
-
-
- def cal_miou_scannet_sam(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = (f['datasets'][:] * 255).astype(np.uint8)
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
-
- # ckpt = torch.load(os.path.join(pred_dir_path, scene_id,
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/tensorf', scene_id,
- 'rawfeas', f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_sem_classes = ckpt['kwargs']['sem_dim']
-
- gt_label_maps = []
- pred_sem_maps = []
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode + '_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h * w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0) # (num_imgs, h*w, 1)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1, dtype=np.int16) # (num_imgs, h*w, 1)
-
- for color_label_idx in range(num_sem_classes):
- color_label = ins_rgbs[color_label_idx]
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- '''fill with gt'''
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- '''ignore'''
- # valid_label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # '''fill with gt'''
- # # ignore_label_mask = (pred_label_maps_flatten == -1)
- # # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # '''ignore'''
- # valid_label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[valid_label_mask], gt_label_maps_flatten[valid_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"miou = {miou}")
-
- miou_result_desc = f"miou = {miou}\n"
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_scannet_sam(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = (f['datasets'][:] * 255).astype(np.uint8)
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
-
- # ckpt = torch.load(os.path.join(pred_dir_path, scene_id,
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/tensorf', scene_id,
- 'rawfeas', f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_sem_classes = ckpt['kwargs']['sem_dim']
-
- gt_label_maps = []
- pred_sem_maps = []
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode + '_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h * w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0) # (num_imgs, h*w, 1)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1, dtype=np.int16) # (num_imgs, h*w, 1)
-
- for color_label_idx in range(num_sem_classes):
- color_label = ins_rgbs[color_label_idx]
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- '''fill with gt'''
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- '''ignore'''
- # valid_label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- pq_result_desc = f"pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
-
- def cal_miou_replica_semnerf(gt_dir_path, pred_dir_path, scene_id, mode='test'):
- ins2label_path = os.path.join(gt_dir_path, 'color_dict.json')
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 15))
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- ratio = 1. / sem_interval
-
- gt_sem_maps = []
- pred_label_maps = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
-
- pred_label_path = os.path.join(pred_dir_path, f'{scene_id_simple}_P{(ratio):.2f}', 'step_000000',
- f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- pred_label_maps = np.stack(pred_label_maps, 0) # (num_imgs, h*w, 1)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {(1-ratio) * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {(1-ratio) * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_replica_semnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
- img_indices = img_indices[::3]
-
- map_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- ratio = 1. / sem_interval
- sparse_ratio = 1 - 1. / sem_interval
- gt_sem_maps = []
- pred_label_maps = []
-
- pred_label_dir_path = os.path.join(pred_dir_path, f'{scene_id_simple}_P{(ratio):.2f}', 'step_000000')
- # pred_logit_path = os.path.join(pred_label_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- # pred_logit_paths = glob.glob(os.path.join(pred_label_dir_path, '049999_logit_*.npy'))
- all_ap = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- pred_label_path = os.path.join(pred_label_dir_path, f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- # '''remap prediction'''
- # pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- # pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
- #
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_label_path, gt_label_map in zip(pred_label_maps, gt_label_maps):
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm.add_batch(pred_label_path, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_miou_scannet_semnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- miou_result_desc = ''
-
- ckpt = torch.load(os.path.join(pred_dir_path,
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_label_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_scannet_semnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- ckpt = torch.load(os.path.join(pred_dir_path,
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location=device)
- num_sem_classes = ckpt['kwargs']['sem_dim']
-
- # ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- # with h5py.File(ins_rgbs_path, 'r') as f:
- # ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- map_result_desc = ''
-
- for sem_interval in [20]:
- # for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1. / sem_interval
- gt_label_maps = []
- pred_label_maps = []
-
- # pred_label_dir_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000', 'test_render')
- # pred_logit_path = os.path.join(pred_label_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- # pred_logit_paths = glob.glob(os.path.join(pred_label_dir_path, '049999_logit_*.npy'))
- all_ap = []
-
- pred_label_dir_path = '/data/dzy_data/nerf/models_zty/semantic_nerf-main/log/SSR_scene0012_0.05/test_render/step_000000/'
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = 3 * i
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- pred_label_path = os.path.join(pred_label_dir_path, f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- # '''remap gt'''
- # gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- # gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
- # unique_sem_maps = np.unique(gt_sem_maps)
- #
- # for sem_map in unique_sem_maps:
- # # todo: add judgement
- # gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- # '''remap prediction'''
- # pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- # pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
- #
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
- '''all label maps'''
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
-
- # val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_idx in tqdm(range(len(pred_label_maps))):
- pred_label_map = pred_label_maps[pred_idx]
- gt_label_map = gt_label_maps[pred_idx]
- # unique_gt_labels = np.unique(gt_label_map)
- # num_valid_labels = len(unique_gt_labels)
- #
- # val_cm.add_batch(pred_label_path, gt_label_map, return_miou=False)
- # confusion_matrix = val_cm.confusion_matrix
- #
- # siou = np.divide(np.diag(confusion_matrix),
- # (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- # - np.diag(confusion_matrix) + 1e-6))
- #
- # iou_metrics = siou[unique_gt_labels]
- # iou_metrics = torch.from_numpy(iou_metrics).to(device)
- '''pred'''
- pred_logit = np.load(os.path.join(pred_label_dir_path, f'logits_{pred_idx:03d}.npz'))['arr_0']
- pred_logit = F.softmax(torch.from_numpy(pred_logit), dim=-1).numpy()
-
- pred_conf_mask = np.max(pred_logit, axis=-1)
- pred_label_map = np.argmax(pred_logit, axis=-1)
- unique_pred_labels = np.unique(pred_label_map)
- H, W = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(-1, 1)
-
- '''gt'''
- # gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- # gt_sem_map = np.array(Image.open(gt_image_path))
- # gt_sem_map = gt_sem_map.reshape(-1, 1)
- # gt_label_map = np.zeros_like(gt_sem_map, dtype=np.int64)
- # unique_sem_map = np.unique(gt_sem_map)
- # for sem_map in unique_sem_map:
- # # todo: add judgement
- # gt_label_map[gt_sem_map == sem_map] = ins2label[str(sem_map)]
- # gt_label_map = gt_label_map.reshape(H, W)
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
-
- '''confidence values'''
- # prepare confidence values
- unique_pred_labels, unique_gt_labels = torch.from_numpy(unique_pred_labels), torch.from_numpy(
- unique_gt_labels)
- pred_label_map = torch.from_numpy(pred_label_map).reshape(H, W)
- pred_conf_mask = torch.from_numpy(pred_conf_mask)
- conf_scores = torch.zeros_like(unique_gt_labels, dtype=torch.float32)
- for i, label in enumerate(unique_gt_labels):
- if label.item() in unique_pred_labels:
- index = torch.where(pred_label_map == label)
- ssm = pred_conf_mask[index[0], index[1]] # confidence value
- pred_obj_conf = torch.median(ssm).item() # median confidence value for one object
- conf_scores[i] = pred_obj_conf
-
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
- conf_scores = conf_scores.to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, confidence=conf_scores, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_pq_scannet_semnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- pq_result_desc = ''
-
- ckpt = torch.load(os.path.join(pred_dir_path,
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_label_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
- # def intersection_over_union(pred_mask, gt_mask):
- # intersection = np.logical_and(pred_mask, gt_mask).sum()
- # union = np.logical_or(pred_mask, gt_mask).sum()
- # iou = intersection / union if union > 0 else 0.0
- # return iou
- #
- # def panoptic_quality(pred_segmentation,
- # # pred_class,
- # gt_segmentation,
- # # gt_class,
- # unique_classes):
- # # todo: own classes
- # # unique_classes = np.unique(np.concatenate((pred_class, gt_class)))
- #
- # pq_per_class = {}
- # pred_instances = {class_id: [] for class_id in unique_classes}
- # gt_instances = {class_id: [] for class_id in unique_classes}
- #
- # for class_id in unique_classes:
- # pred_mask = pred_segmentation == class_id
- # pred_class_ids, pred_instance_ids = skimage.measure.label(pred_mask, return_num=True)
- # pred_instances[class_id] = pred_instance_ids
- #
- # gt_mask = gt_segmentation == class_id
- # gt_class_ids, gt_instance_ids = skimage.measure.label(gt_mask, return_num=True)
- # gt_instances[class_id] = gt_instance_ids
- #
- # pq_sum = 0.0
- # pq_weight_sum = 0.0
- #
- # for class_id in unique_classes:
- # # if class_id == 0: # Ignore the background class (usually 0)
- # # continue
- #
- # pred_mask = pred_segmentation == class_id
- # gt_mask = gt_segmentation == class_id
- #
- # # class_iou = intersection_over_union(pred_mask, gt_mask)
- # gt_instance_ids = gt_instances[class_id]
- # pred_instance_ids = pred_instances[class_id]
- #
- # matched_ious = np.zeros((len(gt_instance_ids), len(pred_instance_ids)))
- #
- # # for i, gt_instance_id in enumerate(gt_instance_ids):
- # # gt_mask_single = gt_mask & (gt_class == class_id) & (gt_instance_id > 0)
- # # for j, pred_instance_id in enumerate(pred_instance_ids):
- # # pred_mask_single = pred_mask & (pred_class == class_id) & (pred_instance_id > 0)
- # # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # for i, gt_instance_id in enumerate(gt_instance_ids):
- # gt_mask_single = gt_mask
- # for j, pred_instance_id in enumerate(pred_instance_ids):
- # pred_mask_single = pred_mask
- # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # # Hungarian algorithm to find the best matching between predicted and ground truth instances
- # row_ind, col_ind = scipy.optimize.linear_sum_assignment(-matched_ious)
- # matched_ious = matched_ious[row_ind, col_ind]
- # matched_ious[matched_ious < 0.5] = 0.0 # Set the threshold for instance matching
- #
- # tp = matched_ious.sum()
- # fp = len(pred_instance_ids) - tp
- # fn = len(gt_instance_ids) - tp
- #
- # class_pq = tp / (tp + 0.5 * fp + 0.5 * fn)
- # pq_per_class[class_id] = class_pq
- #
- # pq_sum += tp
- # pq_weight_sum += tp + 0.5 * fp + 0.5 * fn
- #
- # panoptic_quality = pq_sum / pq_weight_sum if pq_weight_sum > 0 else 0.0
- #
- # return panoptic_quality, pq_per_class
- #
- # # Example usage:
- # # pred_segmentation: Predicted panoptic segmentation mask (numpy array)
- # # pred_class: Predicted class labels for each pixel (numpy array)
- # # gt_segmentation: Ground truth panoptic segmentation mask (numpy array)
- # # gt_class: Ground truth class labels for each pixel (numpy array)
- # # pq, pq_per_class = panoptic_quality(pred_segmentation, pred_class, gt_segmentation, gt_class)
- # # print("Panoptic Quality:", pq)
- # # print("Per-class PQ:", pq_per_class)
- #
- # scene_id_simple = scene_id.replace('_', '')
- # img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- #
- # ckpt = torch.load(os.path.join(pred_dir_path,
- # f'{scene_id_simple}_rawfeas_tinyvoxel',
- # f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- # map_location='cpu')
- # num_unqiue_classes = ckpt['kwargs']['sem_dim']
- #
- # pq_result_desc = ''
- #
- # for sem_interval in [1, 2, 5, 10, 20]:
- # sparse_ratio = 1 - 1 / sem_interval
- # gt_label_maps = []
- # pred_label_maps = []
- #
- # for i in tqdm(range(len(img_indices))):
- # # prediction
- # pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- # f'label_{pred_idx:03d}.png')
- # pred_label_map = np.array(Image.open(pred_image_path))
- # h, w = pred_label_map.shape
- # pred_label_map = pred_label_map.reshape(h*w, 1)
- # pred_label_maps.append(pred_label_map)
- #
- # # gt
- # gt_idx = img_indices[i]
- # gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- # gt_label_map = np.array(Image.open(gt_image_path))
- # if resize:
- # gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- # gt_label_map = gt_label_map.reshape(h*w, 1)
- # gt_label_maps.append(gt_label_map)
- #
- # gt_label_maps = np.stack(gt_label_maps, 0)
- # pred_label_maps = np.stack(pred_label_maps, 0)
- #
- # mpq = 0.0
- # for gt_label, pred_label in zip(gt_label_maps, pred_label_maps):
- # pq, _ = panoptic_quality(pred_segmentation=pred_label, gt_segmentation=gt_label,
- # unique_classes=list(range(num_unqiue_classes)))
- # mpq += pq
- #
- # print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- # pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
- #
- # with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- # f.write(pq_result_desc)
-
-
- def cal_miou_replica_dmnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
-
- pred_image_path = os.path.join(pred_dir_path, scene_id_simple, f'sparse{sem_interval}', 'testset_200000',
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- # todo: remove tricks
- if (sem_interval == 1) or (sem_interval == 2) or (sem_interval == 5):
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
- else:
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # ignore
- label_mask = (pred_label_maps_flatten != -1)
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # # fill with gt
- # # ignore_label_mask = (pred_label_map == -1)
- # # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- # # ignore
- # label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[label_mask], gt_label_map[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- #
- # # fill with gt
- # # ignore_label_mask = (pred_label_maps_flatten == -1)
- # # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # # ignore
- # label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_replica_dmnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, resize=True, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- # scene_id_simple = scene_id.replace('_', '')
- # ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/sem_nerf/scannet',
- # f'{scene_id_simple}_rawfeas_tinyvoxel',
- # f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- # map_location='cpu')
- # num_semantic_classes = ckpt['kwargs']['sem_dim']
- #
- # ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- # with h5py.File(ins_rgbs_path, 'r') as f:
- # ins_rgbs = (f['datasets'][:] * 255).astype(np.uint8)
- #
- # scene_id_simple = scene_id.replace('_', '')
- # img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
-
- pq_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
-
- pred_image_path = os.path.join(pred_dir_path, scene_id_simple, f'sparse{sem_interval}', 'testset_200000',
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- # sparse_ratio = 1 - 1 / sem_interval
- # gt_label_maps = []
- # pred_sem_maps = []
- #
- # for i in tqdm(range(len(img_indices))):
- # # prediction
- # pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, f'{scene_id_simple}_00',
- # f'sparse{sem_interval:02d}',
- # # 'testset_300000', # 10,38
- # 'render_test_300000', # 12,24,33,88,113,192
- # f'instance_{pred_idx:03d}.png')
- # pred_color_sem_map = np.array(Image.open(pred_image_path))
- # h, w, _ = pred_color_sem_map.shape
- # pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- # pred_sem_maps.append(pred_color_sem_map)
- #
- # # gt
- # gt_idx = img_indices[i]
- # gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- # gt_label_map = np.array(Image.open(gt_image_path))
- # if resize:
- # gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- # gt_label_map = gt_label_map.reshape(h*w, 1)
- # gt_label_maps.append(gt_label_map)
- #
- # gt_label_maps = np.stack(gt_label_maps, 0)
- #
- # '''remap prediction'''
- # pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- # pred_label_maps = np.full(gt_label_maps.shape, -1) # (num_imgs, h*w, 1)
- #
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
- # def intersection_over_union(pred_mask, gt_mask):
- # intersection = np.logical_and(pred_mask, gt_mask).sum()
- # union = np.logical_or(pred_mask, gt_mask).sum()
- # iou = intersection / union if union > 0 else 0.0
- # return iou
- #
- # def panoptic_quality(pred_segmentation,
- # # pred_class,
- # gt_segmentation,
- # # gt_class,
- # unique_classes):
- # # todo: own classes
- # # unique_classes = np.unique(np.concatenate((pred_class, gt_class)))
- #
- # pq_per_class = {}
- # pred_instances = {class_id: [] for class_id in unique_classes}
- # gt_instances = {class_id: [] for class_id in unique_classes}
- #
- # for class_id in unique_classes:
- # pred_mask = pred_segmentation == class_id
- # pred_class_ids, pred_instance_ids = skimage.measure.label(pred_mask, return_num=True)
- # pred_instances[class_id] = pred_instance_ids
- #
- # gt_mask = gt_segmentation == class_id
- # gt_class_ids, gt_instance_ids = skimage.measure.label(gt_mask, return_num=True)
- # gt_instances[class_id] = gt_instance_ids
- #
- # pq_sum = 0.0
- # pq_weight_sum = 0.0
- #
- # for class_id in unique_classes:
- # # if class_id == 0: # Ignore the background class (usually 0)
- # # continue
- #
- # pred_mask = pred_segmentation == class_id
- # gt_mask = gt_segmentation == class_id
- #
- # # class_iou = intersection_over_union(pred_mask, gt_mask)
- # gt_instance_ids = gt_instances[class_id]
- # pred_instance_ids = pred_instances[class_id]
- #
- # matched_ious = np.zeros((len(gt_instance_ids), len(pred_instance_ids)))
- #
- # # for i, gt_instance_id in enumerate(gt_instance_ids):
- # # gt_mask_single = gt_mask & (gt_class == class_id) & (gt_instance_id > 0)
- # # for j, pred_instance_id in enumerate(pred_instance_ids):
- # # pred_mask_single = pred_mask & (pred_class == class_id) & (pred_instance_id > 0)
- # # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # for i, gt_instance_id in enumerate(gt_instance_ids):
- # gt_mask_single = gt_mask
- # for j, pred_instance_id in enumerate(pred_instance_ids):
- # pred_mask_single = pred_mask
- # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # # Hungarian algorithm to find the best matching between predicted and ground truth instances
- # row_ind, col_ind = scipy.optimize.linear_sum_assignment(-matched_ious)
- # matched_ious = matched_ious[row_ind, col_ind]
- # matched_ious[matched_ious < 0.5] = 0.0 # Set the threshold for instance matching
- #
- # tp = matched_ious.sum()
- # fp = len(pred_instance_ids) - tp
- # fn = len(gt_instance_ids) - tp
- #
- # class_pq = tp / (tp + 0.5 * fp + 0.5 * fn)
- # pq_per_class[class_id] = class_pq
- #
- # pq_sum += tp
- # pq_weight_sum += tp + 0.5 * fp + 0.5 * fn
- #
- # panoptic_quality = pq_sum / pq_weight_sum if pq_weight_sum > 0 else 0.0
- #
- # return panoptic_quality, pq_per_class
- #
- # # Example usage:
- # # pred_segmentation: Predicted panoptic segmentation mask (numpy array)
- # # pred_class: Predicted class labels for each pixel (numpy array)
- # # gt_segmentation: Ground truth panoptic segmentation mask (numpy array)
- # # gt_class: Ground truth class labels for each pixel (numpy array)
- # # pq, pq_per_class = panoptic_quality(pred_segmentation, pred_class, gt_segmentation, gt_class)
- # # print("Panoptic Quality:", pq)
- # # print("Per-class PQ:", pq_per_class)
- #
- # scene_id_simple = scene_id.replace('_', '')
- # img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- #
- # ckpt = torch.load(os.path.join(pred_dir_path,
- # f'{scene_id_simple}_rawfeas_tinyvoxel',
- # f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- # map_location='cpu')
- # num_unqiue_classes = ckpt['kwargs']['sem_dim']
- #
- # pq_result_desc = ''
- #
- # for sem_interval in [1, 2, 5, 10, 20]:
- # sparse_ratio = 1 - 1 / sem_interval
- # gt_label_maps = []
- # pred_label_maps = []
- #
- # for i in tqdm(range(len(img_indices))):
- # # prediction
- # pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- # f'label_{pred_idx:03d}.png')
- # pred_label_map = np.array(Image.open(pred_image_path))
- # h, w = pred_label_map.shape
- # pred_label_map = pred_label_map.reshape(h*w, 1)
- # pred_label_maps.append(pred_label_map)
- #
- # # gt
- # gt_idx = img_indices[i]
- # gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- # gt_label_map = np.array(Image.open(gt_image_path))
- # if resize:
- # gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- # gt_label_map = gt_label_map.reshape(h*w, 1)
- # gt_label_maps.append(gt_label_map)
- #
- # gt_label_maps = np.stack(gt_label_maps, 0)
- # pred_label_maps = np.stack(pred_label_maps, 0)
- #
- # mpq = 0.0
- # for gt_label, pred_label in zip(gt_label_maps, pred_label_maps):
- # pq, _ = panoptic_quality(pred_segmentation=pred_label, gt_segmentation=gt_label,
- # unique_classes=list(range(num_unqiue_classes)))
- # mpq += pq
- #
- # print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- # pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
- #
- # with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- # f.write(pq_result_desc)
-
-
- def cal_miou_scannet_dmnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/sem_nerf/scannet',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = (f['datasets'][:]*255).astype(np.uint8)
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_sem_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, 'scannet', f'{scene_id_simple}_00',
- f'sparse{sem_interval:02d}',
- 'testset_300000', # 10,38
- # 'render_test_300000', # 12,24,33,88,113,192
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full(gt_label_maps.shape, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''metrics_1'''
- # # todo: remove tricks
- # if (sem_interval == 1) or (sem_interval == 2) or (sem_interval == 5):
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # # fill with gt
- # ignore_label_mask = (pred_label_map == -1)
- # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # else:
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # # ignore
- # label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''metrics_2'''
- # # todo: remove tricks
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # # fill with gt
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on imgs, metrics_1'''
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- # ignore
- # label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[label_mask], gt_label_map[label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on pixels, metrics_2'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- #
- # # fill with gt
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # # ignore
- # # label_mask = (pred_label_maps_flatten != -1)
- # # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/scannet/miou_{scene_id_simple}_metrics1.txt', 'w') as f:
- # with open(f'{pred_dir_path}/scannet/miou_{scene_id_simple}_metrics2.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_pq_scannet_dmnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/sem_nerf/scannet',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = (f['datasets'][:] * 255).astype(np.uint8)
-
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- pq_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_sem_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, f'{scene_id_simple}_00',
- f'sparse{sem_interval:02d}',
- # 'testset_300000', # 10,38
- 'render_test_300000', # 12,24,33,88,113,192
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full(gt_label_maps.shape, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- # sparse_ratio = 1 - 1 / sem_interval
- # gt_label_maps = []
- # pred_label_maps = []
- #
- # for i in tqdm(range(len(img_indices))):
- # # prediction
- # pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- # f'label_{pred_idx:03d}.png')
- # pred_label_map = np.array(Image.open(pred_image_path))
- # h, w = pred_label_map.shape
- # pred_label_map = pred_label_map.reshape(h*w, 1)
- # pred_label_maps.append(pred_label_map)
- #
- # # gt
- # gt_idx = img_indices[i]
- # gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- # gt_label_map = np.array(Image.open(gt_image_path))
- # if resize:
- # gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- # gt_label_map = gt_label_map.reshape(h*w, 1)
- # gt_label_maps.append(gt_label_map)
- #
- # gt_label_maps = np.stack(gt_label_maps, 0)
- # pred_label_maps = np.stack(pred_label_maps, 0)
-
- '''pq'''
- metric_pq = []
-
- '''avg on imgs'''
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
- IoU_sum = np.divide(np.diag(confusion_matrix), (
- np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) - np.diag(
- confusion_matrix)))
-
- tp = np.diag(confusion_matrix)
- fp = np.sum(confusion_matrix, axis=1) - np.diag(confusion_matrix)
- fn = np.sum(confusion_matrix, axis=0) - np.diag(confusion_matrix)
- pq = (IoU_sum * tp) / (tp + 0.5 * fp + 0.5 * fn)
- pq = np.nanmean(pq)
-
- val_cm.reset()
-
- metric_pq.append(pq)
-
- mpq = np.mean(np.array(metric_pq))
- print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
-
- with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- f.write(pq_result_desc)
-
- # def intersection_over_union(pred_mask, gt_mask):
- # intersection = np.logical_and(pred_mask, gt_mask).sum()
- # union = np.logical_or(pred_mask, gt_mask).sum()
- # iou = intersection / union if union > 0 else 0.0
- # return iou
- #
- # def panoptic_quality(pred_segmentation,
- # # pred_class,
- # gt_segmentation,
- # # gt_class,
- # unique_classes):
- # # todo: own classes
- # # unique_classes = np.unique(np.concatenate((pred_class, gt_class)))
- #
- # pq_per_class = {}
- # pred_instances = {class_id: [] for class_id in unique_classes}
- # gt_instances = {class_id: [] for class_id in unique_classes}
- #
- # for class_id in unique_classes:
- # pred_mask = pred_segmentation == class_id
- # pred_class_ids, pred_instance_ids = skimage.measure.label(pred_mask, return_num=True)
- # pred_instances[class_id] = pred_instance_ids
- #
- # gt_mask = gt_segmentation == class_id
- # gt_class_ids, gt_instance_ids = skimage.measure.label(gt_mask, return_num=True)
- # gt_instances[class_id] = gt_instance_ids
- #
- # pq_sum = 0.0
- # pq_weight_sum = 0.0
- #
- # for class_id in unique_classes:
- # # if class_id == 0: # Ignore the background class (usually 0)
- # # continue
- #
- # pred_mask = pred_segmentation == class_id
- # gt_mask = gt_segmentation == class_id
- #
- # # class_iou = intersection_over_union(pred_mask, gt_mask)
- # gt_instance_ids = gt_instances[class_id]
- # pred_instance_ids = pred_instances[class_id]
- #
- # matched_ious = np.zeros((len(gt_instance_ids), len(pred_instance_ids)))
- #
- # # for i, gt_instance_id in enumerate(gt_instance_ids):
- # # gt_mask_single = gt_mask & (gt_class == class_id) & (gt_instance_id > 0)
- # # for j, pred_instance_id in enumerate(pred_instance_ids):
- # # pred_mask_single = pred_mask & (pred_class == class_id) & (pred_instance_id > 0)
- # # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # for i, gt_instance_id in enumerate(gt_instance_ids):
- # gt_mask_single = gt_mask
- # for j, pred_instance_id in enumerate(pred_instance_ids):
- # pred_mask_single = pred_mask
- # matched_ious[i, j] = intersection_over_union(pred_mask_single, gt_mask_single)
- #
- # # Hungarian algorithm to find the best matching between predicted and ground truth instances
- # row_ind, col_ind = scipy.optimize.linear_sum_assignment(-matched_ious)
- # matched_ious = matched_ious[row_ind, col_ind]
- # matched_ious[matched_ious < 0.5] = 0.0 # Set the threshold for instance matching
- #
- # tp = matched_ious.sum()
- # fp = len(pred_instance_ids) - tp
- # fn = len(gt_instance_ids) - tp
- #
- # class_pq = tp / (tp + 0.5 * fp + 0.5 * fn)
- # pq_per_class[class_id] = class_pq
- #
- # pq_sum += tp
- # pq_weight_sum += tp + 0.5 * fp + 0.5 * fn
- #
- # panoptic_quality = pq_sum / pq_weight_sum if pq_weight_sum > 0 else 0.0
- #
- # return panoptic_quality, pq_per_class
- #
- # # Example usage:
- # # pred_segmentation: Predicted panoptic segmentation mask (numpy array)
- # # pred_class: Predicted class labels for each pixel (numpy array)
- # # gt_segmentation: Ground truth panoptic segmentation mask (numpy array)
- # # gt_class: Ground truth class labels for each pixel (numpy array)
- # # pq, pq_per_class = panoptic_quality(pred_segmentation, pred_class, gt_segmentation, gt_class)
- # # print("Panoptic Quality:", pq)
- # # print("Per-class PQ:", pq_per_class)
- #
- # scene_id_simple = scene_id.replace('_', '')
- # img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- #
- # ckpt = torch.load(os.path.join(pred_dir_path,
- # f'{scene_id_simple}_rawfeas_tinyvoxel',
- # f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- # map_location='cpu')
- # num_unqiue_classes = ckpt['kwargs']['sem_dim']
- #
- # pq_result_desc = ''
- #
- # for sem_interval in [1, 2, 5, 10, 20]:
- # sparse_ratio = 1 - 1 / sem_interval
- # gt_label_maps = []
- # pred_label_maps = []
- #
- # for i in tqdm(range(len(img_indices))):
- # # prediction
- # pred_idx = i
- # pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- # f'label_{pred_idx:03d}.png')
- # pred_label_map = np.array(Image.open(pred_image_path))
- # h, w = pred_label_map.shape
- # pred_label_map = pred_label_map.reshape(h*w, 1)
- # pred_label_maps.append(pred_label_map)
- #
- # # gt
- # gt_idx = img_indices[i]
- # gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- # gt_label_map = np.array(Image.open(gt_image_path))
- # if resize:
- # gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- # gt_label_map = gt_label_map.reshape(h*w, 1)
- # gt_label_maps.append(gt_label_map)
- #
- # gt_label_maps = np.stack(gt_label_maps, 0)
- # pred_label_maps = np.stack(pred_label_maps, 0)
- #
- # mpq = 0.0
- # for gt_label, pred_label in zip(gt_label_maps, pred_label_maps):
- # pq, _ = panoptic_quality(pred_segmentation=pred_label, gt_segmentation=gt_label,
- # unique_classes=list(range(num_unqiue_classes)))
- # mpq += pq
- #
- # print(f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}")
- # pq_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, pq = {mpq}\n"
- #
- # with open(f'{pred_dir_path}/pq_{scene_id_simple}.txt', 'w') as f:
- # f.write(pq_result_desc)
-
-
- if __name__ == '__main__':
- parser = configargparse.ArgumentParser()
- parser.add_argument('--model', type=str, choices=['tensorf', 'semnerf', 'dmnerf', 'sam'])
- parser.add_argument('--dataset', type=str, choices=['replica', 'scannet'])
- parser.add_argument('--gt_dir_path', type=str)
- parser.add_argument('--pred_dir_path', type=str)
- parser.add_argument('--ins2label_path', type=str, default='')
- parser.add_argument('--scene_id', type=str, choices=['office_0', 'office_2', 'office_3', 'office_4',
- 'room_0', 'room_1', 'room_2',
- 'scene_0010', 'scene_0012', 'scene_0024', 'scene_0033',
- 'scene_0038', 'scene_0088', 'scene_0113', 'scene_0192',
- 'all'])
-
- args = parser.parse_args()
-
- if args.model == 'tensorf':
- if args.dataset == 'replica':
- # cal_miou_replica_tensorf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- # cal_ap_replica_tensorf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_pq_replica_tensorf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- # cal_miou_scannet_tensorf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
- cal_pq_scannet_tensorf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
-
- elif args.model == 'semnerf':
- if args.dataset == 'replica':
- # cal_miou_replica_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_ap_replica_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- # cal_miou_scannet_semnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
- # cal_ap_scannet_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_pq_scannet_semnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
-
- elif args.model == 'dmnerf':
- if args.dataset == 'replica':
- # cal_miou_replica_dmnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_pq_replica_dmnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- # cal_miou_scannet_dmnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
- cal_pq_scannet_dmnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
-
- elif args.model == 'sam':
- if args.dataset == 'replica':
- # cal_miou_replica_sam(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_pq_replica_sam(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- # cal_miou_scannet_sam(args.gt_dir_path, args.pred_dir_path, args.scene_id)
- cal_pq_scannet_sam(args.gt_dir_path, args.pred_dir_path, args.scene_id)
|