jingmohan
/
mindface_retinaface_resnet50
forked from lemon/mindface_retinaface_resnet50

 
			
			   
				 
					
						
						
							
							# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Augmentation."""
import random
import copy
import cv2
import numpy as np

def _rand(a=0., b=1.):
    """rand"""
    return np.random.rand() * (b - a) + a

def bbox_iof(bbox_a, bbox_b, offset=0):
    """bbox_iof"""
    if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
        raise IndexError("Bounding boxes axis 1 must have at least length 4")

    tl = np.maximum(bbox_a[:, None, 0:2], bbox_b[:, 0:2])
    br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])

    area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
    return area_i / np.maximum(area_a[:, None], 1)

def _is_iof_satisfied_constraint(box, crop_box):
    """_is_iof_satisfied_constraint"""
    iof = bbox_iof(box, crop_box)
    satisfied = np.any((iof >= 1.0))
    return satisfied

def _choose_candidate(max_trial, image_w, image_h, boxes):
    """_choose_candidate"""
    # add default candidate
    candidates = [(0, 0, image_w, image_h)]

    for _ in range(max_trial):
        # box_data should have at least one box
        if _rand() > 0.2:
            scale = _rand(0.3, 1.0)
        else:
            scale = 1.0

        nh = int(scale * min(image_w, image_h))
        nw = nh

        dx = int(_rand(0, image_w - nw))
        dy = int(_rand(0, image_h - nh))

        if boxes.shape[0] > 0:
            crop_box = np.array((dx, dy, dx + nw, dy + nh))
            if not _is_iof_satisfied_constraint(boxes, crop_box[np.newaxis]):
                continue
            candidates.append((dx, dy, nw, nh))
        else:
            raise Exception("!!! annotation box is less than 1")

        if len(candidates) >= 3:
            break

    return candidates

def _correct_bbox_by_candidates(candidates, input_w, input_h, flip,\
     boxes, labels, landms, allow_outside_center):
    """Calculate correct boxes."""
    while candidates:
        if len(candidates) > 1:
            # ignore default candidate which do not crop
            candidate = candidates.pop(np.random.randint(1, len(candidates)))
        else:
            candidate = candidates.pop(np.random.randint(0, len(candidates)))
        dx, dy, nw, nh = candidate

        boxes_t = copy.deepcopy(boxes)
        landms_t = copy.deepcopy(landms)
        labels_t = copy.deepcopy(labels)
        landms_t = landms_t.reshape([-1, 5, 2])

        if nw == nh:
            scale = float(input_w) / float(nw)
        else:
            scale = float(input_w) / float(max(nh, nw))
        boxes_t[:, [0, 2]] = (boxes_t[:, [0, 2]] - dx) * scale
        boxes_t[:, [1, 3]] = (boxes_t[:, [1, 3]] - dy) * scale
        landms_t[:, :, 0] = (landms_t[:, :, 0] - dx) * scale
        landms_t[:, :, 1] = (landms_t[:, :, 1] - dy) * scale

        if flip:
            boxes_t[:, [0, 2]] = input_w - boxes_t[:, [2, 0]]
            landms_t[:, :, 0] = input_w - landms_t[:, :, 0]
            # flip landms
            landms_t_1 = landms_t[:, 1, :].copy()
            landms_t[:, 1, :] = landms_t[:, 0, :]
            landms_t[:, 0, :] = landms_t_1
            landms_t_4 = landms_t[:, 4, :].copy()
            landms_t[:, 4, :] = landms_t[:, 3, :]
            landms_t[:, 3, :] = landms_t_4

        if allow_outside_center:
            pass
        else:
            mask1 = np.logical_and((boxes_t[:, 0] + boxes_t[:, 2])/2. >= 0., \
                (boxes_t[:, 1] + boxes_t[:, 3])/2. >= 0.)
            boxes_t = boxes_t[mask1]
            landms_t = landms_t[mask1]
            labels_t = labels_t[mask1]

            mask2 = np.logical_and((boxes_t[:, 0] + boxes_t[:, 2]) / 2. <= input_w,
                                   (boxes_t[:, 1] + boxes_t[:, 3]) / 2. <= input_h)
            boxes_t = boxes_t[mask2]
            landms_t = landms_t[mask2]
            labels_t = labels_t[mask2]

        # recorrect x, y for case x,y < 0 reset to zero,
        # after dx and dy, some box can smaller than zero
        boxes_t[:, 0:2][boxes_t[:, 0:2] < 0] = 0
        # recorrect w,h not higher than input size
        boxes_t[:, 2][boxes_t[:, 2] > input_w] = input_w
        boxes_t[:, 3][boxes_t[:, 3] > input_h] = input_h
        box_w = boxes_t[:, 2] - boxes_t[:, 0]
        box_h = boxes_t[:, 3] - boxes_t[:, 1]
        # discard invalid box: w or h smaller than 1 pixel
        mask3 = np.logical_and(box_w > 1, box_h > 1)
        boxes_t = boxes_t[mask3]
        landms_t = landms_t[mask3]
        labels_t = labels_t[mask3]

        # normal
        boxes_t[:, [0, 2]] /= input_w
        boxes_t[:, [1, 3]] /= input_h
        landms_t[:, :, 0] /= input_w
        landms_t[:, :, 1] /= input_h

        landms_t = landms_t.reshape([-1, 10])
        labels_t = np.expand_dims(labels_t, 1)

        targets_t = np.hstack((boxes_t, landms_t, labels_t))

        if boxes_t.shape[0] > 0:

            return targets_t, candidate

    raise Exception('all candidates can not satisfied re-correct bbox')

def get_interp_method(interp, sizes=()):
    """Get the interpolation method for resize functions.
    The major purpose of this function is to wrap a random interp method selection
    and a auto-estimation method.

    Parameters
    ----------
    interp : int
        interpolation method for all resizing operations

        Possible values:
        0: Nearest Neighbors Interpolation.
        1: Bilinear interpolation.
        2: Bicubic interpolation over 4x4 pixel neighborhood.
        3: Nearest Neighbors. [Originally it should be Area-based,
        as we cannot find Area-based, so we use NN instead.
        Area-based (resampling using pixel area relation). It may be a
        preferred method for image decimation, as it gives moire-free
        results. But when the image is zoomed, it is similar to the Nearest
        Neighbors method. (used by default).
        4: Lanczos interpolation over 8x8 pixel neighborhood.
        9: Cubic for enlarge, area for shrink, bilinear for others
        10: Random select from interpolation method mentioned above.
        Note:
        When shrinking an image, it will generally look best with AREA-based
        interpolation, whereas, when enlarging an image, it will generally look best
        with Bicubic (slow) or Bilinear (faster but still looks OK).
        More details can be found in the documentation of OpenCV, please refer to
        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
    sizes : tuple of int
        (old_height, old_width, new_height, new_width), if None provided, auto(9)
        will return Area(2) anyway.

    Returns
    -------
    int
        interp method from 0 to 4
    """
    if interp == 9:
        if sizes:
            assert len(sizes) == 4
            oh, ow, nh, nw = sizes
            if nh > oh and nw > ow:
                return 2
            if nh < oh and nw < ow:
                return 0
            return 1
        return 2
    if interp == 10:
        return random.randint(0, 4)
    if interp not in (0, 1, 2, 3, 4):
        # raise ValueError('Unknown interp method %d' % interp)
        raise ValueError(f'Unknown interp method {interp}')
    return interp

def cv_image_reshape(interp):
    """Reshape pil image."""
    reshape_type = {
        0: cv2.INTER_LINEAR,
        1: cv2.INTER_CUBIC,
        2: cv2.INTER_AREA,
        3: cv2.INTER_NEAREST,
        4: cv2.INTER_LANCZOS4,
    }
    return reshape_type[interp]

def color_convert(image, a=1, b=0):
    """color_convert"""
    c_image = image.astype(float) * a + b
    c_image[c_image < 0] = 0
    c_image[c_image > 255] = 255

    image[:] = c_image

def color_distortion(image):
    """color_distortion"""
    image = copy.deepcopy(image)

    if _rand() > 0.5:
        if _rand() > 0.5:
            color_convert(image, b=_rand(-32, 32))
        if _rand() > 0.5:
            color_convert(image, a=_rand(0.5, 1.5))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        if _rand() > 0.5:
            color_convert(image[:, :, 1], a=_rand(0.5, 1.5))
        if _rand() > 0.5:
            h_img = image[:, :, 0].astype(int) + random.randint(-18, 18)
            h_img %= 180
            image[:, :, 0] = h_img
        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
    else:
        if _rand() > 0.5:
            color_convert(image, b=random.uniform(-32, 32))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        if _rand() > 0.5:
            color_convert(image[:, :, 1], a=random.uniform(0.5, 1.5))
        if _rand() > 0.5:
            tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
            tmp %= 180
            image[:, :, 0] = tmp
        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
        if _rand() > 0.5:
            color_convert(image, a=random.uniform(0.5, 1.5))

    return image

class Preproc():
    """Preproc"""
    def __init__(self, image_dim):
        self.image_input_size = image_dim

    def __call__(self, image, target):
        assert target.shape[0] > 0, "target without ground truth."
        copy_target = copy.deepcopy(target)
        boxes = copy_target[:, :4]
        landms = copy_target[:, 4:-1]
        labels = copy_target[:, -1]

        aug_image, aug_target = self._data_aug(image, boxes, labels, landms, self.image_input_size)

        return aug_image, aug_target

    def _data_aug(self, image, boxes, labels, landms, image_input_size, max_trial=250):
        """_data_aug"""
        image_h, image_w, _ = image.shape
        input_h, input_w = image_input_size, image_input_size

        flip = _rand() < .5

        candidates = _choose_candidate(max_trial=max_trial,
                                       image_w=image_w,
                                       image_h=image_h,
                                       boxes=boxes)
        targets, candidate = _correct_bbox_by_candidates(candidates=candidates,
                                                         input_w=input_w,
                                                         input_h=input_h,
                                                         flip=flip,
                                                         boxes=boxes,
                                                         labels=labels,
                                                         landms=landms,
                                                         allow_outside_center=False)
        # crop image
        dx, dy, nw, nh = candidate
        image = image[dy:(dy + nh), dx:(dx + nw)]

        if nw != nh:
            assert nw == image_w and nh == image_h
            # pad ori image to square
            l = max(nw, nh)
            t_image = np.empty((l, l, 3), dtype=image.dtype)
            t_image[:, :] = (104, 117, 123)
            t_image[:nh, :nw] = image
            image = t_image

        interp = get_interp_method(interp=10)
        image = cv2.resize(image, (input_w, input_h), interpolation=cv_image_reshape(interp))

        if flip:
            image = image[:, ::-1]

        image = image.astype(np.float32)
        image -= (104, 117, 123)
        image = image.transpose(2, 0, 1)

        return image, targets