|
- import glob
- import os
-
- import configargparse
- import cv2
- import json
- import h5py
- from tqdm import tqdm
- import torch
- import torch.nn.functional as F
-
- from PIL import Image
- import numpy as np
-
- from utils.metrics import ConfusionMatrix, calculate_ap
-
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
- def cal_miou_replica_tensorf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- for sem_interval in [20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path, scene_id,
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- 'ablate_SDF', f'{scene_id_simple}_interval{sem_interval}_insPE',
- 'imgs_vis', 'sem',
- f'049999_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- # with open(f'{pred_dir_path}/miou_{scene_id}_39999.txt', 'w') as f:
- with open(f'{pred_dir_path}/miou_{scene_id}_49999.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_miou_replica_tensorf_ablation(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- # ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- # with h5py.File(ins_rgbs_path, 'r') as f:
- # ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- sem_interval = 20
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_label_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel_norawfeas',
- 'imgs_vis', 'sem', f'039999_label_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w)
- pred_label_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_label_maps = np.stack(pred_label_maps, 0) # (num_imgs, h*w)
-
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_label_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id}_ablation.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_replica_tensorf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- map_result_desc = ''
- all_ap = []
-
- # for sem_interval in [1, 2, 5, 10, 20]:
- for sem_interval in [20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_sem_maps = []
- pred_sem_maps = []
-
- pred_logit_dir_path = os.path.join(pred_dir_path, scene_id,
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- 'imgs_vis', 'sem')
- # pred_logit_path = os.path.join(pred_logit_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- pred_logit_paths = glob.glob(os.path.join(pred_logit_dir_path, '049999_logit_*.npy'))
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- if len(pred_logit_paths) > 0:
- pred_logit = np.load(os.path.join(pred_logit_dir_path, f'049999_logit_{pred_idx:03d}.npy'))
- pred_logit = F.softmax(torch.from_numpy(pred_logit), dim=-1).numpy()
-
- pred_conf_mask = np.max(pred_logit, axis=-1)
- pred_label_map = np.argmax(pred_logit, axis=-1)
- unique_pred_labels = np.unique(pred_label_map)
- H, W = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(-1, 1)
-
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(-1, 1)
- gt_label_map = np.zeros_like(gt_sem_map, dtype=np.int64)
- unique_sem_map = np.unique(gt_sem_map)
- for sem_map in unique_sem_map:
- # todo: add judgement
- gt_label_map[gt_sem_map == sem_map] = ins2label[str(sem_map)]
- # gt_label_map = gt_label_map.reshape(H, W)
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix), (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
-
- '''confidence values'''
- # prepare confidence values
- unique_pred_labels, unique_gt_labels = torch.from_numpy(unique_pred_labels), torch.from_numpy(unique_gt_labels)
- pred_label_map = torch.from_numpy(pred_label_map).reshape(H, W)
- pred_conf_mask = torch.from_numpy(pred_conf_mask)
- conf_scores = torch.zeros_like(unique_gt_labels, dtype=torch.float32)
- for i, label in enumerate(unique_gt_labels):
- if label.item() in unique_pred_labels:
- index = torch.where(pred_label_map == label)
- ssm = pred_conf_mask[index[0], index[1]] # confidence value
- pred_obj_conf = torch.median(ssm).item() # median confidence value for one object
- conf_scores[i] = pred_obj_conf
-
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
- conf_scores = conf_scores.to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, confidence=conf_scores, function_select='integral')
- all_ap.append(ap)
- else:
- pred_image_path = os.path.join(pred_logit_dir_path, f'049999_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- if len(pred_logit_paths) == 0:
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_label_map, gt_label_map in zip(pred_label_maps, gt_label_maps):
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm.add_batch(pred_label_map, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- # with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_miou_scannet_tensorf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
- miou_result_desc = ''
-
- for sem_inerval in [1, 2, 5, 10, 20]:
- # for sem_inerval in [1, 2, 5]:
- # for sem_inerval in [10, 20]:
- # for sem_inerval in [20]:
- sparse_ratio = 1 - 1 / sem_inerval
- gt_label_maps = []
- pred_label_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, scene_id,
- f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_inerval}_tinyvoxel_raw40000_sem170000',
- 'imgs_vis', 'sem',
- f'099999_label_{pred_idx:03d}.png')
- # f'079999_label_{pred_idx:03d}.png')
- # f'119999_label_{pred_idx:03d}.png')
- # f'199999_label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
- ckpt = torch.load(os.path.join(pred_dir_path, scene_id, 'rawfeas',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_20_again.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_10.txt', 'w') as f:
- # with open(f'{pred_dir_path}/miou_{scene_id_simple}_10_select.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_miou_replica_sam(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- gt_sem_maps = []
- pred_sem_maps = []
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- pred_image_path = os.path.join(pred_dir_path,
- # f'{scene_id_simple}_vitb_grid32_min0_intersection',
- # f'{scene_id_simple}_vitl_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid16_min0_intersection',
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- # f'{scene_id_simple}_vith_grid32_min5000_intersection',
- # f'{scene_id_simple}_vith_grid64_min0_intersection',
- # f'{scene_id_simple}_vith_grid64_min50_intersection',
- # f'{scene_id_simple}_vith_grid64_min500_intersection',
- # f'{scene_id_simple}_vith_grid64_min5000_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # '''fill with gt'''
- # ignore_label_mask = (pred_label_map == -1)
- # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- #
- # '''ignore'''
- # # valid_label_mask = (pred_label_map != -1)
- # # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- '''fill with gt'''
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
-
- '''ignore'''
- valid_label_mask = (pred_label_maps_flatten != -1)
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[valid_label_mask], gt_label_maps_flatten[valid_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"miou = {miou}")
-
- miou_result_desc = f"miou = {miou}\n"
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_miou_scannet_sam(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = (f['datasets'][:] * 255).astype(np.uint8)
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode+'_split.txt')).astype(np.int16)
-
- # ckpt = torch.load(os.path.join(pred_dir_path, scene_id,
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/tensorf', scene_id,
- 'rawfeas', f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_sem_classes = ckpt['kwargs']['sem_dim']
-
- gt_label_maps = []
- pred_sem_maps = []
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path,
- f'{scene_id_simple}_vith_grid32_min0_intersection',
- f'{pred_idx:03d}_ins.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode + '_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h * w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0) # (num_imgs, h*w, 1)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1, dtype=np.int16) # (num_imgs, h*w, 1)
-
- for color_label_idx in range(num_sem_classes):
- color_label = ins_rgbs[color_label_idx]
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
-
- '''avg on imgs'''
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- '''fill with gt'''
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- '''ignore'''
- # valid_label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[valid_label_mask], gt_label_map[valid_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # '''fill with gt'''
- # # ignore_label_mask = (pred_label_maps_flatten == -1)
- # # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # '''ignore'''
- # valid_label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[valid_label_mask], gt_label_maps_flatten[valid_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"miou = {miou}")
-
- miou_result_desc = f"miou = {miou}\n"
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_miou_replica_semnerf(gt_dir_path, pred_dir_path, scene_id, mode='test'):
- ins2label_path = os.path.join(gt_dir_path, 'color_dict.json')
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 15))
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- ratio = 1. / sem_interval
-
- gt_sem_maps = []
- pred_label_maps = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
-
- pred_label_path = os.path.join(pred_dir_path, f'{scene_id_simple}_P{(ratio):.2f}', 'step_000000',
- f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- pred_label_maps = np.stack(pred_label_maps, 0) # (num_imgs, h*w, 1)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {(1-ratio) * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {(1-ratio) * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_replica_semnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_sem_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
- img_indices = img_indices[::3]
-
- map_result_desc = ''
- all_ap = []
-
- for sem_interval in [1, 2, 5, 10, 20]:
- ratio = 1. / sem_interval
- sparse_ratio = 1 - 1. / sem_interval
- gt_sem_maps = []
- pred_label_maps = []
-
- pred_label_dir_path = os.path.join(pred_dir_path, f'{scene_id_simple}_P{(ratio):.2f}', 'step_000000')
- # pred_logit_path = os.path.join(pred_label_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- # pred_logit_paths = glob.glob(os.path.join(pred_label_dir_path, '049999_logit_*.npy'))
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- pred_label_path = os.path.join(pred_label_dir_path, f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- # '''remap prediction'''
- # pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- # pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
- #
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_label_path, gt_label_map in zip(pred_label_maps, gt_label_maps):
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm.add_batch(pred_label_path, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_miou_scannet_semnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_label_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',
- f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_image_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
- ckpt = torch.load(os.path.join(pred_dir_path,
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # # print(metric_iou)
-
- '''avg on pixels'''
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_ap_scannet_semnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, thre_list=[0.5, 0.75], resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- ckpt = torch.load(os.path.join(pred_dir_path,
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location=device)
- num_sem_classes = ckpt['kwargs']['sem_dim']
-
- # ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- # with h5py.File(ins_rgbs_path, 'r') as f:
- # ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
- map_result_desc = ''
- all_ap = []
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1. / sem_interval
- gt_label_maps = []
- pred_label_maps = []
-
- pred_label_dir_path = os.path.join(pred_dir_path, f'{scene_id}_P{(1-sparse_ratio):.2f}', 'step_000000',)
- # pred_logit_path = os.path.join(pred_label_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem')
- # pred_logit_paths = glob.glob(os.path.join(pred_label_dir_path, '049999_logit_*.npy'))
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- # pred_logit_path = os.path.join(pred_dir_path, scene_id,
- # # f'{scene_id_simple}_sem_MLPFea_complexersemmat_pca64_interval_{sem_interval}_tinyvoxel',
- # f'{scene_id_simple}_sem_MLPFea_complexersemmat_warmup_interval_{sem_interval}_tinyvoxel',
- # 'imgs_vis', 'sem', f'049999_{pred_idx:03d}.png')
- # pred_logit_path = os.path.join(pred_dir_path, f'{scene_id}', 'ablate_SDF',
- # f'{scene_id_simple}_interval{sem_interval}_insPE',
- # 'imgs_vis', 'sem',
- # f'049999_logit_{pred_idx:03d}.npy')
-
- pred_label_path = os.path.join(pred_label_dir_path, f'label_{pred_idx:03d}.png')
- pred_label_map = np.array(Image.open(pred_label_path))
- h, w = pred_label_map.shape
- pred_label_map = pred_label_map.reshape(h*w, 1)
- pred_label_maps.append(pred_label_map)
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- # '''remap gt'''
- # gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- # gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
- # unique_sem_maps = np.unique(gt_sem_maps)
- #
- # for sem_map in unique_sem_maps:
- # # todo: add judgement
- # gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- # '''remap prediction'''
- # pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- # pred_label_maps = np.zeros_like(gt_label_maps) # (num_imgs, h*w, 1)
- #
- # for color_label_idx, color_label in enumerate(ins_rgbs):
- # for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- # color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
- #
- # pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
- gt_label_maps = np.stack(gt_label_maps, 0)
- pred_label_maps = np.stack(pred_label_maps, 0)
-
- val_cm = ConfusionMatrix(num_classes=num_sem_classes)
- for pred_label_path, gt_label_map in zip(pred_label_maps, gt_label_maps):
- unique_gt_labels = np.unique(gt_label_map)
- num_valid_labels = len(unique_gt_labels)
-
- val_cm.add_batch(pred_label_path, gt_label_map, return_miou=False)
- confusion_matrix = val_cm.confusion_matrix
-
- siou = np.divide(np.diag(confusion_matrix),
- (np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0)
- - np.diag(confusion_matrix) + 1e-6))
-
- iou_metrics = siou[unique_gt_labels]
- iou_metrics = torch.from_numpy(iou_metrics).to(device)
-
- ap = calculate_ap(iou_metrics, num_valid_labels, thre_list, device, function_select='integral')
- all_ap.append(ap)
-
- all_ap_array = np.array(all_ap)
- mean_ap = np.mean(all_ap_array, axis=0)
- print(mean_ap)
- map_result_desc += f'sparse_ratio = {sparse_ratio * 100}%, '
- for i, thre in enumerate(thre_list):
- map_result_desc += f'map@{thre} = {mean_ap[i]}, '
- if i == len(thre_list) - 1:
- map_result_desc += '\n'
-
- with open(f'{pred_dir_path}/map_{scene_id}.txt', 'w') as f:
- # with open(f'{pred_dir_path}/map_{scene_id}_distill.txt', 'w') as f:
- f.write(map_result_desc)
-
-
- def cal_miou_replica_dmnerf(gt_dir_path, pred_dir_path, ins2label_path, scene_id, mode='test'):
- ins2label = json.load((open(ins2label_path, 'r')))['replica'][scene_id]
- num_semantic_classes = len(ins2label)
-
- scene_id_simple = scene_id.replace('_', '')
-
- ins_rgbs_path = os.path.join(gt_dir_path, scene_id, 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- img_total_num = len(glob.glob(os.path.join(gt_dir_path, scene_id, 'semantic_instance', 'semantic_instance_*.png')))
- if mode == 'train':
- img_indices = list(range(0, img_total_num, 5))
- else:
- img_indices = list(range(5 // 2, img_total_num, 5))
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
-
- gt_sem_maps = []
- pred_sem_maps = []
-
- for i in tqdm(img_indices):
- # prediction
- pred_idx = int(i // 5)
-
- pred_image_path = os.path.join(pred_dir_path, scene_id_simple, f'sparse{sem_interval}', 'testset_200000',
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
- # gt
- gt_image_path = os.path.join(gt_dir_path, scene_id, 'semantic_instance', f'semantic_instance_{i}.png')
- gt_sem_map = np.array(Image.open(gt_image_path))
- gt_sem_map = gt_sem_map.reshape(h*w, 1)
- gt_sem_maps.append(gt_sem_map)
-
- '''remap gt'''
- gt_sem_maps = np.stack(gt_sem_maps, 0) # (num_imgs, h*w, 1)
- gt_label_maps = np.zeros_like(gt_sem_maps) # (num_imgs, h*w, 1)
-
- unique_sem_maps = np.unique(gt_sem_maps)
-
- for sem_map in unique_sem_maps:
- # todo: add judgement
- gt_label_maps[gt_sem_maps == sem_map] = ins2label[str(sem_map.item())]
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full_like(gt_label_maps, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- # todo: remove tricks
- if (sem_interval == 1) or (sem_interval == 2) or (sem_interval == 5):
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
- else:
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # ignore
- label_mask = (pred_label_maps_flatten != -1)
- pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on imgs'''
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # # fill with gt
- # # ignore_label_mask = (pred_label_map == -1)
- # # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- # # ignore
- # label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[label_mask], gt_label_map[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on pixels'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- #
- # # fill with gt
- # # ignore_label_mask = (pred_label_maps_flatten == -1)
- # # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # # ignore
- # label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/miou_{scene_id_simple}.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- def cal_miou_scannet_dmnerf(gt_dir_path, pred_dir_path, scene_id, resize=True, mode='test'):
- scene_id_simple = scene_id.replace('_', '')
- ckpt = torch.load(os.path.join('/data/dzy_data/nerf/ICANN2023/sem_nerf/scannet',
- f'{scene_id_simple}_rawfeas_tinyvoxel',
- f'{scene_id_simple}_rawfeas_tinyvoxel.pth'),
- map_location='cpu')
- num_semantic_classes = ckpt['kwargs']['sem_dim']
-
- img_indices = np.loadtxt(os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode + '_split.txt')).astype(np.int16)
-
- ins_rgbs_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', 'ins_rgb.hdf5')
- with h5py.File(ins_rgbs_path, 'r') as f:
- ins_rgbs = f['datasets'][:].astype(np.uint8)
-
- miou_result_desc = ''
-
- for sem_interval in [1, 2, 5, 10, 20]:
- sparse_ratio = 1 - 1 / sem_interval
- gt_label_maps = []
- pred_sem_maps = []
-
- for i in tqdm(range(len(img_indices))):
- # prediction
- pred_idx = i
- pred_image_path = os.path.join(pred_dir_path, 'scannet', f'{scene_id_simple}_00',
- f'sparse{sem_interval:02d}',
- 'testset_300000', # 10,38
- # 'render_test_300000', # 12,24,33,88,113,192
- f'instance_{pred_idx:03d}.png')
- pred_color_sem_map = np.array(Image.open(pred_image_path))
- h, w, _ = pred_color_sem_map.shape
- pred_color_sem_map = pred_color_sem_map.reshape(h*w, 3)
- pred_sem_maps.append(pred_color_sem_map)
-
- # gt
- gt_idx = img_indices[i]
- gt_image_path = os.path.join(gt_dir_path, f'{scene_id_simple}_00', mode, mode+'_ins_full', f'{gt_idx}.png')
- gt_label_map = np.array(Image.open(gt_image_path))
- if resize:
- gt_label_map = cv2.resize(gt_label_map, (w, h), interpolation=cv2.INTER_NEAREST) # (h, w)
- gt_label_map = gt_label_map.reshape(h*w, 1)
- gt_label_maps.append(gt_label_map)
-
- gt_label_maps = np.stack(gt_label_maps, 0)
-
- '''remap prediction'''
- pred_sem_maps = np.stack(pred_sem_maps, 0) # (num_imgs, h*w, 3)
- pred_label_maps = np.full(gt_label_maps.shape, -1) # (num_imgs, h*w, 1)
-
- for color_label_idx, color_label in enumerate(ins_rgbs):
- for idx, image in tqdm(enumerate(pred_sem_maps), desc=f'Remapping {color_label_idx}th color label'):
- color_label_idx_in_pred = np.where((image == color_label).all(1))[0]
-
- pred_label_maps[idx][color_label_idx_in_pred] = color_label_idx
-
- '''miou'''
- metric_ious = []
- val_cm = ConfusionMatrix(num_classes=num_semantic_classes)
-
- '''metrics_1'''
- # # todo: remove tricks
- # if (sem_interval == 1) or (sem_interval == 2) or (sem_interval == 5):
- # for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # # fill with gt
- # ignore_label_mask = (pred_label_map == -1)
- # pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- # metric_ious.append(metric_iou)
- # else:
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # # ignore
- # label_mask = (pred_label_maps_flatten != -1)
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''metrics_2'''
- # # todo: remove tricks
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- # # fill with gt
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- '''avg on imgs, metrics_1'''
- for (pred_label_map, gt_label_map) in tqdm(zip(pred_label_maps, gt_label_maps)):
- # fill with gt
- ignore_label_mask = (pred_label_map == -1)
- pred_label_map[ignore_label_mask] = gt_label_map[ignore_label_mask]
- # ignore
- # label_mask = (pred_label_map != -1)
- # pred_label_map, gt_label_map = pred_label_map[label_mask], gt_label_map[label_mask]
-
- metric_iou = val_cm.add_batch(pred_label_map, gt_label_map, return_miou=True)
- metric_ious.append(metric_iou)
-
- '''avg on pixels, metrics_2'''
- # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps.reshape(-1, 1), gt_label_maps.reshape(-1, 1)
- #
- # # fill with gt
- # ignore_label_mask = (pred_label_maps_flatten == -1)
- # pred_label_maps_flatten[ignore_label_mask] = gt_label_maps_flatten[ignore_label_mask]
- #
- # # ignore
- # # label_mask = (pred_label_maps_flatten != -1)
- # # pred_label_maps_flatten, gt_label_maps_flatten = pred_label_maps_flatten[label_mask], gt_label_maps_flatten[label_mask]
- #
- # metric_iou = val_cm.add_batch(pred_label_maps_flatten, gt_label_maps_flatten, return_miou=True)
- # metric_ious.append(metric_iou)
-
- miou = np.mean(np.array(metric_ious))
- print(f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}")
- miou_result_desc += f"sparse_ratio = {sparse_ratio * 100}%, miou = {miou}\n"
-
- with open(f'{pred_dir_path}/scannet/miou_{scene_id_simple}_metrics1.txt', 'w') as f:
- # with open(f'{pred_dir_path}/scannet/miou_{scene_id_simple}_metrics2.txt', 'w') as f:
- f.write(miou_result_desc)
-
-
- if __name__ == '__main__':
- parser = configargparse.ArgumentParser()
- parser.add_argument('--model', type=str, choices=['tensorf', 'semnerf', 'dmnerf', 'sam'])
- parser.add_argument('--dataset', type=str, choices=['replica', 'scannet'])
- parser.add_argument('--gt_dir_path', type=str)
- parser.add_argument('--pred_dir_path', type=str)
- parser.add_argument('--ins2label_path', type=str, default='')
- parser.add_argument('--scene_id', type=str, choices=['office_0', 'office_2', 'office_3', 'office_4',
- 'room_0', 'room_1', 'room_2',
- 'scene_0010', 'scene_0012', 'scene_0024', 'scene_0033',
- 'scene_0038', 'scene_0088', 'scene_0113', 'scene_0192',
- 'all'])
-
- args = parser.parse_args()
-
- if args.model == 'tensorf':
- if args.dataset == 'replica':
- # cal_miou_replica_tensorf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_ap_replica_tensorf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- cal_miou_scannet_tensorf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
-
- elif args.model == 'semnerf':
- if args.dataset == 'replica':
- # cal_miou_replica_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- cal_ap_replica_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- # cal_miou_scannet_semnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
- cal_ap_scannet_semnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
-
- elif args.model == 'dmnerf':
- if args.dataset == 'replica':
- cal_miou_replica_dmnerf(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- cal_miou_scannet_dmnerf(args.gt_dir_path, args.pred_dir_path, args.scene_id)
-
- elif args.model == 'sam':
- if args.dataset == 'replica':
- cal_miou_replica_sam(args.gt_dir_path, args.pred_dir_path, args.ins2label_path, args.scene_id)
- elif args.dataset == 'scannet':
- cal_miou_scannet_sam(args.gt_dir_path, args.pred_dir_path, args.scene_id)
|