|
- import os
-
- import cv2
- import mindspore as ms
- import numpy as np
- from mindspore import nn, ops, Tensor, value_and_grad
- from mindspore.amp import init_status, all_finite
-
- from common.dataset.transform import get_size_with_aspect_ratio
- from common.detr.matcher.matcher import HungarianMatcher
- from common.utils.box_ops import box_xyxy_to_cxcywh
- from common.utils.preprocessing import pad_as_batch
- from common.utils.system import is_windows
- from model_zoo.dino.build_model import build_dino
-
-
- def get_input():
- # test inference runtime
- image_root = r"C:\02Data\demo\image" if is_windows else '/data1/zhouwuxing/demo/'
- image_path1 = os.path.join(image_root, 'hrnet_demo.jpg')
- image_path2 = os.path.join(image_root, 'road554.png')
- image_path3 = os.path.join(image_root, 'orange_71.jpg')
-
- inputs_list = [dict(image=Tensor.from_numpy(cv2.imread(image_path1)).transpose(2, 0, 1),
- instances=dict(image_size=(423, 359), gt_classes=Tensor([3, 7], ms.int32),
- gt_boxes=Tensor([[100, 200, 210, 300], [50, 100, 90, 150]]))),
- dict(image=Tensor.from_numpy(cv2.imread(image_path2)).transpose(2, 0, 1),
- instances=dict(image_size=(400, 300), gt_classes=Tensor([21, 45, 9], ms.int32),
- gt_boxes=Tensor(
- [[80, 220, 150, 320], [180, 100, 300, 200], [150, 150, 180, 180]]))),
- # dict(image=Tensor.from_numpy(cv2.imread(image_path3)).transpose(2, 0, 1),
- # instances=dict(image_size=(1249, 1400), gt_classes=Tensor([3, 7]),
- # gt_boxes=Tensor([[100, 200, 210, 300], [50, 100, 90, 150]]))),
- ]
- return inputs_list, image_root
-
-
- class Resize(object):
- def __init__(self, size=800, max_size=960):
- self.size = size
- self.max_size = max_size
-
- def __call__(self, img: Tensor, boxes: Tensor):
- if self.size is None:
- print(f'no resize')
- return img, boxes
- img = img.asnumpy().transpose(1, 2, 0)
- h, w, _ = img.shape
-
- nh, nw = get_size_with_aspect_ratio(img.shape, self.size, self.max_size)
- resize_pad_img = cv2.resize(img, (nw, nh), cv2.INTER_CUBIC)
-
- # modify boxes
- ratio_width, ratio_height = float(nw)/float(w), float(nh)/float(h)
- boxes = boxes * Tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
- resize_pad_img = Tensor(resize_pad_img).transpose(2, 0, 1)
- return resize_pad_img, boxes
-
-
- class Pad(object):
- def __init__(self, tgt_h, tgt_w):
- self.tgt_h = tgt_h
- self.tgt_w = tgt_w
-
- def __call__(self, img):
- c, h, w = img.shape
- if self.tgt_h is None or self.tgt_w is None:
- print(f'no pad')
- new_img = img
- new_mask = ops.zeros((h, w), ms.float32)
- return new_img, new_mask
- new_img = ops.zeros((c, self.tgt_h, self.tgt_w), ms.float32)
- new_mask = ops.ones((self.tgt_h, self.tgt_w), ms.float32)
- new_img[:, :h, :w] = img
- new_mask[:h, :w] = 0
- return new_img, new_mask
-
-
- def convert_input_format_with_resizepad(batched_inputs):
- batched_inputs = [batched_inputs[0]]
- images = [x['image'] for x in batched_inputs]
- pixel_mean = Tensor([123.675, 116.280, 103.530]).view(3, 1, 1)
- pixel_std = Tensor([58.395, 57.120, 57.375]).view(3, 1, 1)
- normalizer = lambda x: (x - pixel_mean) / pixel_std
- org_sizes = Tensor([[img.shape[1], img.shape[2]] for img in images])
-
- # targets
- resize = Resize(800, 960)
- pad = Pad(960, 960)
- print(f'pad size', pad.tgt_w, pad.tgt_h)
- gt_instances = [x["instances"] for x in batched_inputs]
- new_targets = []
- gt_classes_list = []
- gt_boxes_list = []
- gt_valids_list = []
- unpad_img_sizes_list = []
- new_image_list = []
- mask_list = []
- for image, targets_per_image in zip(images, gt_instances):
- # h, w = targets_per_image['image_size']
- image, targets_per_image['gt_boxes'] = resize(image, targets_per_image['gt_boxes'])
- _, h, w = image.shape
- print('resized', h, w)
- # Norm for box
- image_size_xyxy = Tensor([w, h, w, h], dtype=ms.float32)
- gt_classes = targets_per_image['gt_classes']
- gt_boxes = targets_per_image['gt_boxes'] / image_size_xyxy # with reference to valid w,h
-
- gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
- new_targets.append({"labels": gt_classes, "boxes": gt_boxes})
- # print('before size', image_size_xyxy)
- # print('before box', targets_per_image['gt_boxes'])
-
- # Norm and pad for image
- image = normalizer(image)
- image, mask = pad(image)
- new_image_list.append(image)
- mask_list.append(mask)
- num_inst = len(gt_boxes)
- gt_classes_list.append(gt_classes)
- gt_boxes_list.append(gt_boxes)
- gt_valids_list.append(ops.ones(num_inst, ms.bool_))
- unpad_img_sizes_list.append([h, w])
-
- images = ops.stack(new_image_list, 0)
- img_masks = ops.stack(mask_list, 0)
-
- return images, img_masks, gt_classes_list, gt_boxes_list, gt_valids_list, org_sizes
-
-
- def convert_input_format(batched_inputs):
- batch_size = len(batched_inputs)
-
- # images
- pixel_mean = Tensor([123.675, 116.280, 103.530]).view(3, 1, 1)
- pixel_std = Tensor([58.395, 57.120, 57.375]).view(3, 1, 1)
- normalizer = lambda x: (x - pixel_mean) / pixel_std
- images = [normalizer(x["image"]) for x in batched_inputs]
- images, unpad_img_sizes = pad_as_batch(images)
-
- _, _, h, w = images.shape
- img_masks = ops.ones((batch_size, h, w), images.dtype)
- for img_id in range(batch_size):
- img_h, img_w = batched_inputs[img_id]["instances"]['image_size']
- img_masks[img_id, :img_h, : img_w] = 0
-
- # targets
- gt_instances = [x["instances"] for x in batched_inputs]
- new_targets = []
- gt_classes_list = []
- gt_boxes_list = []
- gt_valids_list = []
- for targets_per_image in gt_instances:
- h, w = targets_per_image['image_size']
- image_size_xyxy = Tensor([w, h, w, h], dtype=ms.float32)
- gt_classes = targets_per_image['gt_classes']
- gt_boxes = targets_per_image['gt_boxes'] / image_size_xyxy # with reference to valid w,h
- gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
- new_targets.append({"labels": gt_classes, "boxes": gt_boxes})
-
- num_inst = len(gt_boxes)
- gt_classes_list.append(gt_classes)
- gt_boxes_list.append(gt_boxes)
- gt_valids_list.append(ops.ones(num_inst, ms.bool_))
-
- return images, img_masks, gt_classes_list, gt_boxes_list, gt_valids_list
-
-
- if __name__ == "__main__":
- # set context
- ms.set_context(mode=ms.PYNATIVE_MODE, device_target='CPU' if is_windows else 'GPU',
- pynative_synchronize=True, device_id=2)
-
- train = True
- infer = False
-
- pth_dir = r"C:\02Data\models" if is_windows else '/data1/zhouwuxing/pretrained_model/'
- pth_path = os.path.join(pth_dir, "dino_r50_4scale_12ep_49_2AP.pth")
- ms_pth_path = os.path.join(pth_dir, "ms_dino_r50_4scale_12ep_49_2AP.ckpt")
-
- dino = build_dino(unit_test=True)
-
- # # set mix precision
- # dino.to_float(ms.float16)
- # for _, cell in dino.cells_and_names():
- # if isinstance(cell, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, HungarianMatcher)):
- # cell.to_float(ms.float32)
-
- ms.load_checkpoint(ms_pth_path, dino)
-
- inputs, _ = get_input()
- images, img_masks, gt_classes_list, gt_boxes_list, gt_valids_list = convert_input_format(inputs)
- inputs = images, img_masks, gt_boxes_list, gt_classes_list, gt_valids_list
- if infer:
- dino.set_train(False)
- inf_result = dino(inputs)
- print('batch size', len(inf_result))
- for r in inf_result:
- r = r['instances']
- print("image size", r['image_size'])
- print("box shape", r['pred_boxes'].shape)
- print("score shape", r['scores'].shape)
- print("class shape", r['pred_classes'].shape)
-
- if train:
- # train
- dino.set_train(True)
-
- def forward(*_inputs):
- loss_value = dino(*_inputs)
- return loss_value
-
- weight = dino.trainable_params()
- optimizer = nn.SGD(weight, learning_rate=1e-3)
- # optimizer = nn.AdamWeightDecay(weight, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=1e-4)
-
- grad_fn = value_and_grad(forward, grad_position=None, weights=weight)
-
- show_grad_weight = False
- for k in range(1):
- status = init_status()
- loss, gradients = grad_fn(*inputs)
- is_finite = all_finite(gradients, status)
- print(f'loss of the {k} step', loss, f'is_finite: {is_finite}')
-
- if show_grad_weight:
- for i, grad in enumerate(gradients):
- name = weight[i].name
- if not name.startswith('neck.convs.2.norm.gamma'):
- continue
- print(name, grad.shape, grad.mean(), grad.reshape(-1)[:3],
- weight[i].data.mean(), weight[i].data.reshape(-1)[:3])
- optimizer(gradients)
-
- # train one step
- pass
|