|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """
- Data operations, will be used in train.py
- """
-
- import os
- import sys
- import math
- import cv2
- import numpy as np
- import pycocotools.coco as coco
- import mindspore.dataset as ds
- from mindspore import log as logger
- from mindspore.mindrecord import FileWriter
-
- try:
- from src.model_utils.config import config, dataset_config
- from src.model_utils.moxing_adapter import moxing_wrapper
- from src.image import color_aug, get_affine_transform, affine_transform
- from src.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian, draw_dense_reg
- from src.visual import visual_image
- except ImportError as import_error:
- print('Import Error: {}, trying append path/centernet_det/src/../'.format(import_error))
- sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
- from src.model_utils.config import config, dataset_config
- from src.model_utils.moxing_adapter import moxing_wrapper
- from src.image import color_aug, get_affine_transform, affine_transform
- from src.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian, draw_dense_reg
- from src.visual import visual_image
-
-
- _current_dir = os.path.dirname(os.path.realpath(__file__))
- cv2.setNumThreads(0)
-
-
- class COCOHP(ds.Dataset):
- """
- Encapsulation class of COCO datast.
- Initialize and preprocess of image for training and testing.
-
- Args:
- data_dir(str): Path of coco dataset.
- data_opt(edict): Config info for coco dataset.
- net_opt(edict): Config info for CenterNet.
- run_mode(str): Training or testing.
-
- Returns:
- Prepocessed training or testing dataset for CenterNet network.
- """
- def __init__(self, data_opt, run_mode="train", net_opt=None, enable_visual_image=False, save_path=None):
- self._data_rng = np.random.RandomState(123)
- self.data_opt = data_opt
- self.data_opt.mean = self.data_opt.mean.reshape(1, 1, 3)
- self.data_opt.std = self.data_opt.std.reshape(1, 1, 3)
- self.pad = 127
- assert run_mode in ["train", "test", "val"], "only train/test/val mode are supported"
- self.run_mode = run_mode
-
- if net_opt is not None:
- self.net_opt = net_opt
- self.enable_visual_image = enable_visual_image
- if self.enable_visual_image:
- self.save_path = os.path.join(save_path, self.run_mode, "input_image")
- if not os.path.exists(self.save_path):
- os.makedirs(self.save_path)
-
- def init(self, data_dir, keep_res=False):
- """initialize additional info"""
- logger.info('Initializing coco 2017 {} data.'.format(self.run_mode))
- if not os.path.isdir(data_dir):
- raise RuntimeError("Invalid dataset path")
- if self.run_mode != "test":
- self.annot_path = os.path.join(data_dir, 'annotations',
- 'instances_{}2017.json').format(self.run_mode)
- else:
- self.annot_path = os.path.join(data_dir, 'annotations', 'image_info_test-dev2017.json')
- self.image_path = os.path.join(data_dir, '{}2017').format(self.run_mode)
- logger.info('Image path: {}'.format(self.image_path))
- logger.info('Annotations: {}'.format(self.annot_path))
-
- self.coco = coco.COCO(self.annot_path)
- image_ids = self.coco.getImgIds()
-
- self.train_cls = self.data_opt.coco_classes
- self.train_cls_dict = {}
- for i, cls in enumerate(self.train_cls):
- self.train_cls_dict[cls] = i
-
- self.classs_dict = {}
- cat_ids = self.coco.loadCats(self.coco.getCatIds())
- for cat in cat_ids:
- self.classs_dict[cat["id"]] = cat["name"]
-
- if self.run_mode != "test":
- self.images = []
- self.anns = {}
- for img_id in image_ids:
- idxs = self.coco.getAnnIds(imgIds=[img_id])
- if idxs:
- self.images.append(img_id)
- self.anns[img_id] = idxs
- else:
- self.images = image_ids
- self.num_samples = len(self.images)
- self.keep_res = keep_res
- logger.info('Loaded {} {} samples'.format(self.run_mode, self.num_samples))
-
- def __len__(self):
- return self.num_samples
-
- def _coco_box_to_bbox(self, box):
- bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], dtype=np.float32)
- return bbox
-
- def transfer_coco_to_mindrecord(self, mindrecord_dir, file_name="coco_det.train.mind", shard_num=1):
- """Create MindRecord file by image_dir and anno_path."""
- if not os.path.isdir(mindrecord_dir):
- os.makedirs(mindrecord_dir)
- if os.path.isdir(self.image_path) and os.path.exists(self.annot_path):
- logger.info("Create MindRecord based on COCO_HP dataset")
- else:
- raise ValueError('data_dir {} or anno_path {} does not exist'.format(self.image_path, self.annot_path))
-
- mindrecord_path = os.path.join(mindrecord_dir, file_name)
- writer = FileWriter(mindrecord_path, shard_num)
-
- centernet_json = {
- "img_id": {"type": "int32", "shape": [1]},
- "image": {"type": "bytes"},
- "num_objects": {"type": "int32"},
- "bboxes": {"type": "float32", "shape": [-1, 4]},
- "category_id": {"type": "int32", "shape": [-1]},
- }
-
- writer.add_schema(centernet_json, "centernet_json")
-
- for img_id in self.images:
- image_info = self.coco.loadImgs([img_id])
- annos = self.coco.loadAnns(self.anns[img_id])
- # get image
- img_name = image_info[0]['file_name']
- img_name = os.path.join(self.image_path, img_name)
- with open(img_name, 'rb') as f:
- image = f.read()
-
- bboxes = []
- category_id = []
- num_objects = len(annos)
- for anno in annos:
- bbox = self._coco_box_to_bbox(anno['bbox'])
- class_name = self.classs_dict[anno["category_id"]]
- if class_name in self.train_cls:
- x_min, x_max = bbox[0], bbox[2]
- y_min, y_max = bbox[1], bbox[3]
- bboxes.append([x_min, y_min, x_max, y_max])
- category_id.append(self.train_cls_dict[class_name])
-
- row = {"img_id": np.array([img_id], dtype=np.int32),
- "image": image,
- "num_objects": num_objects,
- "bboxes": np.array(bboxes, np.float32),
- "category_id": np.array(category_id, np.int32)}
- writer.write_raw_data([row])
-
- writer.commit()
- logger.info("Create Mindrecord Done, at {}".format(mindrecord_dir))
-
- def _get_border(self, border, size):
- i = 1
- while size - border // i <= border // i:
- i *= 2
- return border // i
-
- def __getitem__(self, index):
- img_id = self.images[index]
- file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
- img_path = os.path.join(self.image_path, file_name)
- img = cv2.imread(img_path)
- image_id = np.array([img_id], dtype=np.int32).reshape((-1))
- ret = (img, image_id)
- return ret
-
- def pre_process_for_test(self, image, img_id, scale):
- """image pre-process for evaluation"""
- b, h, w, ch = image.shape
- assert b == 1, "only single image was supported here"
- image = image.reshape((h, w, ch))
- height, width = image.shape[0:2]
- new_height = int(height * scale)
- new_width = int(width * scale)
- if self.keep_res:
- inp_height = (new_height | self.pad) + 1
- inp_width = (new_width | self.pad) + 1
- c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
- s = np.array([inp_width, inp_height], dtype=np.float32)
- else:
- inp_height, inp_width = self.data_opt.input_res[0], self.data_opt.input_res[1]
- c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
- s = max(height, width) * 1.0
-
- trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
- resized_image = cv2.resize(image, (new_width, new_height))
- inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height),
- flags=cv2.INTER_LINEAR)
- inp_img = (inp_image.astype(np.float32) / 255. - self.data_opt.mean) / self.data_opt.std
-
- eval_image = inp_img.reshape((1,) + inp_img.shape)
- eval_image = eval_image.transpose(0, 3, 1, 2)
-
- meta = {'c': c, 's': s,
- 'out_height': inp_height // self.net_opt.down_ratio,
- 'out_width': inp_width // self.net_opt.down_ratio}
-
- if self.enable_visual_image:
- if self.run_mode != "test":
- annos = self.coco.loadAnns(self.anns[img_id])
- num_objs = min(len(annos), self.data_opt.max_objs)
- ground_truth = []
- for k in range(num_objs):
- ann = annos[k]
- bbox = self._coco_box_to_bbox(ann['bbox']) * scale
- cls_id = int(ann['category_id']) - 1
- bbox[:2] = affine_transform(bbox[:2], trans_input)
- bbox[2:] = affine_transform(bbox[2:], trans_input)
- bbox[0::2] = np.clip(bbox[0::2], 0, inp_width - 1)
- bbox[1::3] = np.clip(bbox[1::3], 0, inp_height - 1)
- h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
- if h <= 0 or w <= 0:
- continue
- bbox = [bbox[0], bbox[1], w, h]
- gt = {
- "image_id": int(img_id),
- "category_id": int(cls_id + 1),
- "bbox": bbox,
- "score": float("{:.2f}".format(1)),
- "id": self.anns[img_id][k]
- }
- ground_truth.append(gt)
- visual_image(inp_image, ground_truth, self.save_path, height=inp_height, width=inp_width,
- name="_scale" + str(scale))
- else:
- image_name = "gt_" + self.run_mode + "_image_" + str(img_id) + "_scale_" + str(scale) + ".png"
- cv2.imwrite("{}/{}".format(self.save_path, image_name), inp_image)
-
- return eval_image, meta
-
- def preprocess_fn(self, image, num_objects, bboxes, category_id):
- """image pre-process and augmentation"""
- num_objs = min(num_objects, self.data_opt.max_objs)
- img = cv2.imdecode(image, cv2.IMREAD_COLOR)
- height = img.shape[0]
- width = img.shape[1]
- c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
- s = max(height, width) * 1.0
- input_h, input_w = self.data_opt.input_res[0], self.data_opt.input_res[1]
- rot = 0
-
- flipped = False
- if self.data_opt.rand_crop:
- s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
- h_border = self._get_border(128, img.shape[0])
- w_border = self._get_border(128, img.shape[1])
- c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
- c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
- else:
- sf = self.data_opt.scale
- cf = self.data_opt.shift
- c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
- c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
- s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
-
- if np.random.random() < self.data_opt.flip_prop:
- flipped = True
- img = img[:, ::-1, :]
- c[0] = width - c[0] - 1
-
- trans_input = get_affine_transform(c, s, rot, [input_w, input_h])
- inp = cv2.warpAffine(img, trans_input, (input_w, input_h),
- flags=cv2.INTER_LINEAR)
- inp = (inp.astype(np.float32) / 255.)
- if self.run_mode == "train" and self.data_opt.color_aug:
- color_aug(self._data_rng, inp, self.data_opt.eig_val, self.data_opt.eig_vec)
-
- if self.data_opt.output_res[0] != self.data_opt.output_res[1]:
- raise ValueError("Only square image was supported to used as output for convenient")
-
- output_h = input_h // self.data_opt.down_ratio
- output_w = input_w // self.data_opt.down_ratio
- max_objs = self.data_opt.max_objs
- num_classes = self.data_opt.num_classes
- trans_output_rot = get_affine_transform(c, s, rot, [output_w, output_h])
-
- hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
- wh = np.zeros((max_objs, 2), dtype=np.float32)
- dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
- reg = np.zeros((max_objs, 2), dtype=np.float32)
- ind = np.zeros((max_objs), dtype=np.int32)
- reg_mask = np.zeros((max_objs), dtype=np.int32)
- cat_spec_wh = np.zeros((max_objs, num_classes * 2), dtype=np.float32)
- cat_spec_mask = np.zeros((max_objs, num_classes * 2), dtype=np.int32)
-
- draw_gaussian = draw_msra_gaussian if self.net_opt.mse_loss else draw_umich_gaussian
-
- ground_truth = []
- for k in range(num_objs):
- bbox = bboxes[k]
- cls_id = category_id[k] - 1
- if flipped:
- bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # index begin from zero
- bbox[:2] = affine_transform(bbox[:2], trans_output_rot)
- bbox[2:] = affine_transform(bbox[2:], trans_output_rot)
- bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
- bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
- h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
- if h <= 0 and w <= 0:
- continue
- radius = gaussian_radius((math.ceil(h), math.ceil(w)))
- radius = max(0, int(radius))
- ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
- ct_int = ct.astype(np.int32)
- draw_gaussian(hm[cls_id], ct_int, radius)
- wh[k] = 1. * w, 1. * h
- ind[k] = ct_int[1] * output_w + ct_int[0]
- reg[k] = ct - ct_int
- reg_mask[k] = 1
- cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
- cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
-
- if self.net_opt.dense_wh:
- draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
- ground_truth.append([ct[0] - w / 2, ct[1] - h / 2,
- ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
- ret = (inp, hm, reg_mask, ind, wh)
- if self.net_opt.dense_wh:
- hm_a = hm.max(axis=0)
- dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
- ret += (dense_wh, dense_wh_mask)
- elif self.net_opt.cat_spec_wh:
- ret += (cat_spec_wh, cat_spec_mask)
- if self.net_opt.reg_offset:
- ret += (reg,)
- return ret
-
- def create_train_dataset(self, mindrecord_dir, prefix="coco_det.train.mind", batch_size=1,
- device_num=1, rank=0, num_parallel_workers=1, do_shuffle=True):
- """create train dataset based on mindrecord file"""
- if not os.path.isdir(mindrecord_dir):
- raise ValueError('MindRecord data_dir {} does not exist'.format(mindrecord_dir))
-
- files = os.listdir(mindrecord_dir)
- data_files = []
- for file_name in files:
- if prefix in file_name and "db" not in file_name:
- data_files.append(os.path.join(mindrecord_dir, file_name))
- if not data_files:
- raise ValueError('data_dir {} have no data files'.format(mindrecord_dir))
-
- columns = ["img_id", "image", "num_objects", "bboxes", "category_id"]
- data_set = ds.MindDataset(data_files,
- columns_list=columns,
- num_parallel_workers=num_parallel_workers, shuffle=do_shuffle,
- num_shards=device_num, shard_id=rank)
- ori_dataset_size = data_set.get_dataset_size()
- logger.info('origin dataset size: {}'.format(ori_dataset_size))
-
- data_set = data_set.map(operations=self.preprocess_fn,
- input_columns=["image", "num_objects", "bboxes", "category_id"],
- output_columns=["image", "hm", "reg_mask", "ind", "wh", "reg"],
- column_order=["image", "hm", "reg_mask", "ind", "wh", "reg"],
- num_parallel_workers=num_parallel_workers,
- python_multiprocessing=True)
- data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8)
- logger.info("data size: {}".format(data_set.get_dataset_size()))
- logger.info("repeat count: {}".format(data_set.get_repeat_count()))
- return data_set
-
- def create_eval_dataset(self, batch_size=1, num_parallel_workers=1):
- """create testing dataset based on coco format"""
-
- def generator():
- for i in range(self.num_samples):
- yield self.__getitem__(i)
-
- column = ["image", "image_id"]
- data_set = ds.GeneratorDataset(generator, column, num_parallel_workers=num_parallel_workers)
- data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8)
- return data_set
-
-
- def modelarts_pre_process():
- """modelarts pre process function."""
- config.coco_data_dir = config.data_path
- config.mindrecord_dir = config.output_path
-
-
- @moxing_wrapper(pre_process=modelarts_pre_process)
- def coco2mindrecord():
- """Convert coco2017 dataset to mindrecord"""
- dsc = COCOHP(dataset_config, run_mode="train")
- dsc.init(config.coco_data_dir)
- dsc.transfer_coco_to_mindrecord(config.mindrecord_dir, config.mindrecord_prefix, shard_num=8)
-
-
- if __name__ == '__main__':
- coco2mindrecord()
|