|
- import copy
- import logging
-
- import numpy as np
- import torch
- from detectron2.data import detection_utils as utils
- from fvcore.common.file_io import PathManager
- from PIL import Image
- from .transforms.arguement import arguementation
-
- from . import transforms as T
-
- """
- This file contains the default mapping that's applied to "dataset dicts".
- """
-
- __all__ = ["DatasetMapper"]
-
-
- def build_transform_gen(cfg, is_train):
- """
- Create a list of :class:`TransformGen` from config.
- Now it includes resizing and flipping.
-
- Returns:
- list[TransformGen]
- """
- logger = logging.getLogger("detectron2")
-
-
- if is_train:
- min_size = cfg.INPUT.MIN_SIZE_TRAIN
- max_size = cfg.INPUT.MAX_SIZE_TRAIN
- sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
- else:
- min_size = cfg.INPUT.MIN_SIZE_TEST
- max_size = cfg.INPUT.MAX_SIZE_TEST
- sample_style = "choice"
- if max_size != 1:
- resize_type = cfg.MODEL.CENTERNET.RESIZE_TYPE
- if resize_type == "ResizeShortestEdge":
- tfm_gens = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
- elif resize_type == "Resize":
- try:
- min_size = min_size[0]
- except:
- min_size = int(min_size)
- tfm_gens = [T.Resize(shape=(min_size, max_size))]
- else:
- tfm_gens = []
-
- if is_train:
- for (aug, args) in cfg.MODEL.CENTERNET.TRAIN_PIPELINES:
- tfm_gens.append(getattr(T, aug)(**args))
- else:
- for (aug, args) in cfg.MODEL.CENTERNET.TEST_PIPELINES:
- tfm_gens.append(getattr(T, aug)(**args))
-
- logger.info("TransformGens used: " + str(tfm_gens))
-
- return tfm_gens
-
-
- def check_sample_valid(args):
- if args["sample_style"] == "range":
- assert (
- len(args["min_size"]) == 2
- ), f"more than 2 ({len(args['min_size'])}) min_size(s) are provided for ranges"
-
-
- class DatasetMapper:
- """
- A callable which takes a dataset dict in centernet Dataset format,
- and map it into a format used by the model.
-
- This is the default callable to be used to map your dataset dict into training data.
- You may need to follow it to implement your own one for customized logic.
-
- The callable currently does the following:
-
- 1. Read the image from "file_name"
- 2. Applies cropping/geometric transforms to the image and annotations
- 3. Prepare data and annotations to Tensor and :class:`Instances`
- """
-
- def __init__(self, cfg, is_train=True):
-
- if cfg.INPUT.CROP.ENABLED and is_train:
- self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
- logging.getLogger('detectron2').info("CropGen used in training: " + str(self.crop_gen))
- else:
- self.crop_gen = None
-
- self.eval_with_gt = cfg.TEST.get("WITH_GT", False)
-
- self.tfm_gens = build_transform_gen(cfg, is_train)
-
- # fmt: off
- self.img_format = cfg.INPUT.FORMAT
- self.mask_on = cfg.MODEL.MASK_ON
- self.mask_format = cfg.INPUT.MASK_FORMAT
- self.keypoint_on = cfg.MODEL.KEYPOINT_ON
- self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
- # fmt: on
- if self.keypoint_on and is_train:
- # Flip only makes sense in training
- self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
- else:
- self.keypoint_hflip_indices = None
-
- if self.load_proposals:
- self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
- self.proposal_topk = (
- cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
- if is_train
- else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
- )
- self.is_train = is_train
- self.imgaug_prob = cfg.MODEL.CENTERNET.IMGAUG_PROB
- self.kd_without_label = cfg.MODEL.CENTERNET.KD.KD_WITHOUT_LABEL
- self.BOX_MINSIZE = cfg.MODEL.CENTERNET.BOX_MINSIZE
-
- def __call__(self, dataset_dict):
- """
- Args:
- dataset_dict (dict): Metadata of one image, in centernet Dataset format.
-
- Returns:
- dict: a format that builtin models in centernet accept
- """
- dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
- # USER: Write your own image loading if it's not from a file
- image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
- utils.check_image_size(dataset_dict, image)
- #draw image
- # for anno in dataset_dict['annotations']:
- # bbox = anno['bbox']
- # import cv2
- # cv2.rectangle(image,
- # (int(bbox[0]), int(bbox[1])),
- # (int(bbox[2]), int(bbox[3])),
- # (0, 255, 0),
- # 2)
- # cv2.imwrite('result.jpg',image)
- # import pdb;
- # pdb.set_trace()
-
- if self.kd_without_label or "annotations" not in dataset_dict:
- image, transforms = T.apply_transform_gens(
- ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
- )
- else:
- # Crop around an instance if there are instances in the image.
- # USER: Remove if you don't use cropping
- if self.crop_gen:
- crop_tfm = utils.gen_crop_transform_with_instance(
- self.crop_gen.get_crop_size(image.shape[:2]),
- image.shape[:2],
- np.random.choice(dataset_dict["annotations"]),
- )
- image = crop_tfm.apply_image(image)
- image, transforms = T.apply_transform_gens(self.tfm_gens, image)
- if self.crop_gen:
- transforms = crop_tfm + transforms
-
- image_shape = image.shape[:2] # h, w
-
- # apply imgaug
- if self.is_train and self.imgaug_prob < 1.0:
- image = arguementation(image, self.imgaug_prob)
-
- # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
- # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
- # Therefore it's important to use torch.Tensor.
- dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
- # Can use uint8 if it turns out to be slow some day
-
- # USER: Remove if you don't use pre-computed proposals.
- if self.load_proposals:
- utils.transform_proposals(
- dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
- )
-
- if not self.is_train and not self.eval_with_gt:
- dataset_dict.pop("annotations", None)
- dataset_dict.pop("sem_seg_file_name", None)
- return dataset_dict
-
- if "annotations" in dataset_dict and not self.kd_without_label:
- # USER: Modify this if you want to keep them for some reason.
- for anno in dataset_dict["annotations"]:
- if not self.mask_on:
- anno.pop("segmentation", None)
- if not self.keypoint_on:
- anno.pop("keypoints", None)
-
- # USER: Implement additional transformations if you have other types of data
- annos = [
- utils.transform_instance_annotations(
- obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
- )
- for obj in dataset_dict.pop("annotations")
- if obj.get("iscrowd", 0) == 0
- ]
- instances = utils.annotations_to_instances(
- annos, image_shape, mask_format=self.mask_format
- )
- # Create a tight bounding box from masks, useful when image is cropped
- if self.crop_gen and instances.has("gt_masks"):
- instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
- dataset_dict["instances"] = utils.filter_empty_instances(instances, box_threshold=self.BOX_MINSIZE)
-
- # USER: Remove if you don't do semantic/panoptic segmentation.
- if "sem_seg_file_name" in dataset_dict:
- with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
- sem_seg_gt = Image.open(f)
- sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
- sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
- sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
- dataset_dict["sem_seg"] = sem_seg_gt
- return dataset_dict
|