|
- import cv2
- import time
- import numpy as np
- import torch
- from torch.autograd.variable import Variable
- from core.models import PNet, RNet, ONet
- import core.utils as utils
- import core.image_tools as image_tools
- from core.vision import vis_face
-
-
- def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):
- pnet, rnet, onet = None, None, None
-
- if p_model_path is not None:
- pnet = PNet(use_cuda=use_cuda)
- if (use_cuda):
- print('p_model_path:{0}'.format(p_model_path))
- pnet.load_state_dict(torch.load(p_model_path))
- pnet.cuda()
- else:
- # forcing all GPU tensors to be in CPU while loading
- # pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage))
- pnet.load_state_dict(torch.load(p_model_path, map_location='cpu'))
- pnet.eval()
-
- if r_model_path is not None:
- rnet = RNet(use_cuda=use_cuda)
- if (use_cuda):
- print('r_model_path:{0}'.format(r_model_path))
- rnet.load_state_dict(torch.load(r_model_path))
- rnet.cuda()
- else:
- rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage))
- rnet.eval()
-
- if o_model_path is not None:
- onet = ONet(use_cuda=use_cuda)
- if (use_cuda):
- print('o_model_path:{0}'.format(o_model_path))
- onet.load_state_dict(torch.load(o_model_path))
- onet.cuda()
- else:
- onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage))
- onet.eval()
-
- return pnet, rnet, onet
-
-
- class MtcnnDetector(object):
- """
- P,R,O net face detection and landmarks align
- """
-
- def __init__(self,
- pnet=None,
- rnet=None,
- onet=None,
- min_face_size=12,
- stride=2,
- threshold=[0.6, 0.7, 0.7],
- # threshold=[0.1, 0.1, 0.1],
- scale_factor=0.709,
- ):
-
- self.pnet_detector = pnet
- self.rnet_detector = rnet
- self.onet_detector = onet
- self.min_face_size = min_face_size
- self.stride = stride
- self.thresh = threshold
- self.scale_factor = scale_factor
-
- def unique_image_format(self, im):
- if not isinstance(im, np.ndarray):
- if im.mode == 'I':
- im = np.array(im, np.int32, copy=False)
- elif im.mode == 'I;16':
- im = np.array(im, np.int16, copy=False)
- else:
- im = np.asarray(im)
- return im
-
- def square_bbox(self, bbox):
- """
- convert bbox to square
- Parameters:
- ----------
- bbox: numpy array , shape n x m
- input bbox
- Returns:
- -------
- a square bbox
- """
- square_bbox = bbox.copy()
-
- # x2 - x1
- # y2 - y1
- h = bbox[:, 3] - bbox[:, 1] + 1
- w = bbox[:, 2] - bbox[:, 0] + 1
- l = np.maximum(h, w)
- # x1 = x1 + w*0.5 - l*0.5
- # y1 = y1 + h*0.5 - l*0.5
- square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - l * 0.5
- square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - l * 0.5
-
- # x2 = x1 + l - 1
- # y2 = y1 + l - 1
- square_bbox[:, 2] = square_bbox[:, 0] + l - 1
- square_bbox[:, 3] = square_bbox[:, 1] + l - 1
- return square_bbox
-
- def generate_bounding_box(self, map, reg, scale, threshold):
- """
- generate bbox from feature map
- Parameters:
- ----------
- map: numpy array , n x m x 1
- detect score for each position
- reg: numpy array , n x m x 4
- bbox
- scale: float number
- scale of this detection
- threshold: float number
- detect threshold
- Returns:
- -------
- bbox array
- """
- stride = 2
- cellsize = 12 # receptive field
-
- t_index = np.where(map[:, :, 0] > threshold)
- # print('shape of t_index:{0}'.format(len(t_index)))
- # print('t_index{0}'.format(t_index))
- # time.sleep(5)
-
- # find nothing
- if t_index[0].size == 0:
- return np.array([])
-
- # reg = (1, n, m, 4)
- # choose bounding box whose socre are larger than threshold
- dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)]
- # print(dx1.shape)
- # exit()
- # time.sleep(5)
- reg = np.array([dx1, dy1, dx2, dy2])
- # print('shape of reg{0}'.format(reg.shape))
- # exit()
-
- # lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, \
- # leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy = [landmarks[0, t_index[0], t_index[1], i] for i in range(10)]
- #
- # landmarks = np.array([lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy])
-
- # abtain score of classification which larger than threshold
- # t_index[0]: choose the first column of t_index
- # t_index[1]: choose the second column of t_index
- score = map[t_index[0], t_index[1], 0]
- # hence t_index[1] means column, t_index[1] is the value of x
- # hence t_index[0] means row, t_index[0] is the 4value of y
- boundingbox = np.vstack([np.round((stride * t_index[1]) / scale), # x1 of prediction box in original image
- np.round((stride * t_index[0]) / scale), # y1 of prediction box in original image
- np.round((stride * t_index[1] + cellsize) / scale),
- # x2 of prediction box in original image
- np.round((stride * t_index[0] + cellsize) / scale),
- # y2 of prediction box in original image
- # reconstruct the box in original image
- score,
- reg,
- # landmarks
- ])
-
- return boundingbox.T
-
- def resize_image(self, img, scale):
- """
- resize image and transform dimention to [batchsize, channel, height, width]
- Parameters:
- ----------
- img: numpy array , height x width x channel
- input image, channels in BGR order here
- scale: float number
- scale factor of resize operation
- Returns:
- -------
- transformed image tensor , 1 x channel x height x width
- """
- height, width, channels = img.shape
- new_height = int(height * scale) # resized new height
- new_width = int(width * scale) # resized new width
- new_dim = (new_width, new_height)
- img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image
- return img_resized
-
- def pad(self, bboxes, w, h):
- """
- pad the the boxes
- Parameters:
- ----------
- bboxes: numpy array, n x 5
- input bboxes
- w: float number
- width of the input image
- h: float number
- height of the input image
- Returns :
- ------
- dy, dx : numpy array, n x 1
- start point of the bbox in target image
- edy, edx : numpy array, n x 1
- end point of the bbox in target image
- y, x : numpy array, n x 1
- start point of the bbox in original image
- ex, ex : numpy array, n x 1
- end point of the bbox in original image
- tmph, tmpw: numpy array, n x 1
- height and width of the bbox
- """
- # width and height
- tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32)
- tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32)
- numbox = bboxes.shape[0]
-
- dx = np.zeros((numbox,))
- dy = np.zeros((numbox,))
- edx, edy = tmpw.copy() - 1, tmph.copy() - 1
- # x, y: start point of the bbox in original image
- # ex, ey: end point of the bbox in original image
- x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
-
- tmp_index = np.where(ex > w - 1)
- edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
- ex[tmp_index] = w - 1
-
- tmp_index = np.where(ey > h - 1)
- edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
- ey[tmp_index] = h - 1
-
- tmp_index = np.where(x < 0)
- dx[tmp_index] = 0 - x[tmp_index]
- x[tmp_index] = 0
-
- tmp_index = np.where(y < 0)
- dy[tmp_index] = 0 - y[tmp_index]
- y[tmp_index] = 0
-
- return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
- return_list = [item.astype(np.int32) for item in return_list]
-
- return return_list
-
- def detect_pnet(self, im):
- """Get face candidates through pnet
-
- Parameters:
- ----------
- im: numpy array
- input image array
- one batch
-
- Returns:
- -------
- boxes: numpy array
- detected boxes before calibration
- boxes_align: numpy array
- boxes after calibration
- """
-
- # im = self.unique_image_format(im)
-
- # original wider face data
- h, w, c = im.shape
- net_size = 12
-
- current_scale = float(net_size) / self.min_face_size # find initial scale
- # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
- im_resized = self.resize_image(im, current_scale) # scale = 1.0
- current_height, current_width, _ = im_resized.shape
- # fcn
- all_boxes = list()
- while min(current_height, current_width) > net_size:
- # print('current:',current_height, current_width)
- feed_imgs = []
- image_tensor = image_tools.convert_image_to_tensor(im_resized)
- feed_imgs.append(image_tensor)
- feed_imgs = torch.stack(feed_imgs)
-
- feed_imgs = Variable(feed_imgs)
-
- if self.pnet_detector.use_cuda:
- feed_imgs = feed_imgs.cuda()
-
- # self.pnet_detector is a trained pnet torch model
-
- # receptive field is 12×12
- # 12×12 --> score
- # 12×12 --> bounding box
- cls_map, reg = self.pnet_detector(feed_imgs)
-
- cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
- reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
- # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
- # time.sleep(5)
- # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())
-
- # self.threshold[0] = 0.6
- # print(cls_map_np[0,:,:].shape)
- # time.sleep(4)
-
- # boxes = [x1, y1, x2, y2, score, reg]
- boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0])
- # cv2.rectangle(im,(300,100),(400,200),color=(0,0,0))
- # cv2.rectangle(im,(400,200),(500,300),color=(0,0,0))
-
- # generate pyramid images
- current_scale *= self.scale_factor # self.scale_factor = 0.709
- im_resized = self.resize_image(im, current_scale)
- current_height, current_width, _ = im_resized.shape
-
- if boxes.size == 0:
- continue
-
- # non-maximum suppresion
- keep = utils.nms(boxes[:, :5], 0.5, 'Union')
- boxes = boxes[keep]
- all_boxes.append(boxes)
-
- """ img = im.copy()
- bw = boxes[:,2]-boxes[:,0]
- bh = boxes[:,3]-boxes[:,1]
- for i in range(boxes.shape[0]):
- p1=(int(boxes[i][0]+boxes[i][5]*bw[i]),int(boxes[i][1]+boxes[i][6]*bh[i]))
- p2=(int(boxes[i][2]+boxes[i][7]*bw[i]),int(boxes[i][3]+boxes[i][8]*bh[i]))
- cv2.rectangle(img,p1,p2,color=(0,0,0))
- cv2.imshow('ss',img)
- cv2.waitKey(0)
- #ii+=1
- exit() """
-
- if len(all_boxes) == 0:
- return None, None
- all_boxes = np.vstack(all_boxes)
- # print("shape of all boxes {0}".format(all_boxes.shape))
- # time.sleep(5)
-
- # merge the detection from first stage
- keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
- all_boxes = all_boxes[keep]
- # boxes = all_boxes[:, :5]
-
- # x2 - x1
- # y2 - y1
- bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
- bh = all_boxes[:, 3] - all_boxes[:, 1] + 1
-
- # landmark_keep = all_boxes[:, 9:].reshape((5,2))
-
- boxes = np.vstack([all_boxes[:, 0],
- all_boxes[:, 1],
- all_boxes[:, 2],
- all_boxes[:, 3],
- all_boxes[:, 4],
- # all_boxes[:, 0] + all_boxes[:, 9] * bw,
- # all_boxes[:, 1] + all_boxes[:,10] * bh,
- # all_boxes[:, 0] + all_boxes[:, 11] * bw,
- # all_boxes[:, 1] + all_boxes[:, 12] * bh,
- # all_boxes[:, 0] + all_boxes[:, 13] * bw,
- # all_boxes[:, 1] + all_boxes[:, 14] * bh,
- # all_boxes[:, 0] + all_boxes[:, 15] * bw,
- # all_boxes[:, 1] + all_boxes[:, 16] * bh,
- # all_boxes[:, 0] + all_boxes[:, 17] * bw,
- # all_boxes[:, 1] + all_boxes[:, 18] * bh
- ])
-
- boxes = boxes.T
-
- # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
- align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
- align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
- align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
- align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh
-
- # refine the boxes
- boxes_align = np.vstack([align_topx,
- align_topy,
- align_bottomx,
- align_bottomy,
- all_boxes[:, 4],
- # align_topx + all_boxes[:,9] * bw,
- # align_topy + all_boxes[:,10] * bh,
- # align_topx + all_boxes[:,11] * bw,
- # align_topy + all_boxes[:,12] * bh,
- # align_topx + all_boxes[:,13] * bw,
- # align_topy + all_boxes[:,14] * bh,
- # align_topx + all_boxes[:,15] * bw,
- # align_topy + all_boxes[:,16] * bh,
- # align_topx + all_boxes[:,17] * bw,
- # align_topy + all_boxes[:,18] * bh,
- ])
- boxes_align = boxes_align.T
-
- # remove invalid box
- valindex = [True for _ in range(boxes_align.shape[0])]
- for i in range(boxes_align.shape[0]):
- if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][3] - boxes_align[i][1] <= 3:
- valindex[i] = False
- print('pnet has one smaller than 3')
- else:
- if boxes_align[i][2] < 1 or boxes_align[i][0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][
- 1] > h - 2:
- valindex[i] = False
- print('pnet has one out')
- boxes_align = boxes_align[valindex, :]
- boxes = boxes[valindex, :]
- return boxes, boxes_align
-
- def detect_rnet(self, im, dets):
- """Get face candidates using rnet
-
- Parameters:
- ----------
- im: numpy array
- input image array
- dets: numpy array
- detection results of pnet
-
- Returns:
- -------
- boxes: numpy array
- detected boxes before calibration
- boxes_align: numpy array
- boxes after calibration
- """
- # im: an input image
- h, w, c = im.shape
-
- if dets is None:
- return None, None
- if dets.shape[0] == 0:
- return None, None
-
- # (705, 5) = [x1, y1, x2, y2, score, reg]
- # print("pnet detection {0}".format(dets.shape))
- # time.sleep(5)
- detss = dets
- # return square boxes
- dets = self.square_bbox(dets)
- detsss = dets
- # rounds
- dets[:, 0:4] = np.round(dets[:, 0:4])
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
- num_boxes = dets.shape[0]
-
- '''
- # helper for setting RNet batch size
- batch_size = self.rnet_detector.batch_size
- ratio = float(num_boxes) / batch_size
- if ratio > 3 or ratio < 0.3:
- print "You may need to reset RNet batch size if this info appears frequently, \
- face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
- '''
-
- # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
- cropped_ims_tensors = []
- for i in range(num_boxes):
- try:
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
- except:
- print(dy[i], edy[i], dx[i], edx[i], y[i], ey[i], x[i], ex[i], tmpw[i], tmph[i])
- print(dets[i])
- print(detss[i])
- print(detsss[i])
- print(h, w)
- exit()
- crop_im = cv2.resize(tmp, (24, 24))
- crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
- # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
- cropped_ims_tensors.append(crop_im_tensor)
- feed_imgs = Variable(torch.stack(cropped_ims_tensors))
-
- if self.rnet_detector.use_cuda:
- feed_imgs = feed_imgs.cuda()
-
- cls_map, reg = self.rnet_detector(feed_imgs)
-
- cls_map = cls_map.cpu().data.numpy()
- reg = reg.cpu().data.numpy()
- # landmark = landmark.cpu().data.numpy()
-
- keep_inds = np.where(cls_map > self.thresh[1])[0]
-
- if len(keep_inds) > 0:
- boxes = dets[keep_inds]
- cls = cls_map[keep_inds]
- reg = reg[keep_inds]
- # landmark = landmark[keep_inds]
- else:
- return None, None
- keep = utils.nms(boxes, 0.7)
-
- if len(keep) == 0:
- return None, None
-
- keep_cls = cls[keep]
- keep_boxes = boxes[keep]
- keep_reg = reg[keep]
- # keep_landmark = landmark[keep]
-
- bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
- bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1
-
- boxes = np.vstack([keep_boxes[:, 0],
- keep_boxes[:, 1],
- keep_boxes[:, 2],
- keep_boxes[:, 3],
- keep_cls[:, 0],
- # keep_boxes[:,0] + keep_landmark[:, 0] * bw,
- # keep_boxes[:,1] + keep_landmark[:, 1] * bh,
- # keep_boxes[:,0] + keep_landmark[:, 2] * bw,
- # keep_boxes[:,1] + keep_landmark[:, 3] * bh,
- # keep_boxes[:,0] + keep_landmark[:, 4] * bw,
- # keep_boxes[:,1] + keep_landmark[:, 5] * bh,
- # keep_boxes[:,0] + keep_landmark[:, 6] * bw,
- # keep_boxes[:,1] + keep_landmark[:, 7] * bh,
- # keep_boxes[:,0] + keep_landmark[:, 8] * bw,
- # keep_boxes[:,1] + keep_landmark[:, 9] * bh,
- ])
-
- align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
- align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
- align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
- align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh
-
- boxes_align = np.vstack([align_topx,
- align_topy,
- align_bottomx,
- align_bottomy,
- keep_cls[:, 0],
- # align_topx + keep_landmark[:, 0] * bw,
- # align_topy + keep_landmark[:, 1] * bh,
- # align_topx + keep_landmark[:, 2] * bw,
- # align_topy + keep_landmark[:, 3] * bh,
- # align_topx + keep_landmark[:, 4] * bw,
- # align_topy + keep_landmark[:, 5] * bh,
- # align_topx + keep_landmark[:, 6] * bw,
- # align_topy + keep_landmark[:, 7] * bh,
- # align_topx + keep_landmark[:, 8] * bw,
- # align_topy + keep_landmark[:, 9] * bh,
- ])
-
- boxes = boxes.T
- boxes_align = boxes_align.T
-
- # remove invalid box
- valindex = [True for _ in range(boxes_align.shape[0])]
- for i in range(boxes_align.shape[0]):
- if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][3] - boxes_align[i][1] <= 3:
- valindex[i] = False
- print('rnet has one smaller than 3')
- else:
- if boxes_align[i][2] < 1 or boxes_align[i][0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][
- 1] > h - 2:
- valindex[i] = False
- print('rnet has one out')
- boxes_align = boxes_align[valindex, :]
- boxes = boxes[valindex, :]
- """ img = im.copy()
- for i in range(boxes_align.shape[0]):
- p1=(int(boxes_align[i,0]),int(boxes_align[i,1]))
- p2=(int(boxes_align[i,2]),int(boxes_align[i,3]))
- cv2.rectangle(img,p1,p2,color=(0,0,0))
- cv2.imshow('ss',img)
- cv2.waitKey(0)
- exit() """
- return boxes, boxes_align
-
- def detect_onet(self, im, dets):
- """Get face candidates using onet
-
- Parameters:
- ----------
- im: numpy array
- input image array
- dets: numpy array
- detection results of rnet
-
- Returns:
- -------
- boxes_align: numpy array
- boxes after calibration
- landmarks_align: numpy array
- landmarks after calibration
-
- """
- h, w, c = im.shape
-
- if dets is None:
- return None, None
- if dets.shape[0] == 0:
- return None, None
-
- detss = dets
- dets = self.square_bbox(dets)
-
- dets[:, 0:4] = np.round(dets[:, 0:4])
-
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
- num_boxes = dets.shape[0]
-
- # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
- cropped_ims_tensors = []
- for i in range(num_boxes):
- try:
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- # crop input image
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
- except:
- print(dy[i], edy[i], dx[i], edx[i], y[i], ey[i], x[i], ex[i], tmpw[i], tmph[i])
- print(dets[i])
- print(detss[i])
- print(h, w)
- crop_im = cv2.resize(tmp, (48, 48))
- crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
- # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
- cropped_ims_tensors.append(crop_im_tensor)
- feed_imgs = Variable(torch.stack(cropped_ims_tensors))
-
- if self.rnet_detector.use_cuda:
- feed_imgs = feed_imgs.cuda()
-
- cls_map, reg, landmark = self.onet_detector(feed_imgs)
-
- cls_map = cls_map.cpu().data.numpy()
- reg = reg.cpu().data.numpy()
- landmark = landmark.cpu().data.numpy()
-
- keep_inds = np.where(cls_map > self.thresh[2])[0]
-
- if len(keep_inds) > 0:
- boxes = dets[keep_inds]
- cls = cls_map[keep_inds]
- reg = reg[keep_inds]
- landmark = landmark[keep_inds]
- else:
- return None, None
-
- keep = utils.nms(boxes, 0.7, mode="Minimum")
-
- if len(keep) == 0:
- return None, None
-
- keep_cls = cls[keep]
- keep_boxes = boxes[keep]
- keep_reg = reg[keep]
- keep_landmark = landmark[keep]
-
- bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
- bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1
-
- align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
- align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
- align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
- align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh
-
- align_landmark_topx = keep_boxes[:, 0]
- align_landmark_topy = keep_boxes[:, 1]
-
- boxes_align = np.vstack([align_topx,
- align_topy,
- align_bottomx,
- align_bottomy,
- keep_cls[:, 0],
- # align_topx + keep_landmark[:, 0] * bw,
- # align_topy + keep_landmark[:, 1] * bh,
- # align_topx + keep_landmark[:, 2] * bw,
- # align_topy + keep_landmark[:, 3] * bh,
- # align_topx + keep_landmark[:, 4] * bw,
- # align_topy + keep_landmark[:, 5] * bh,
- # align_topx + keep_landmark[:, 6] * bw,
- # align_topy + keep_landmark[:, 7] * bh,
- # align_topx + keep_landmark[:, 8] * bw,
- # align_topy + keep_landmark[:, 9] * bh,
- ])
-
- boxes_align = boxes_align.T
-
- landmark = np.vstack([
- align_landmark_topx + keep_landmark[:, 0] * bw,
- align_landmark_topy + keep_landmark[:, 1] * bh,
- align_landmark_topx + keep_landmark[:, 2] * bw,
- align_landmark_topy + keep_landmark[:, 3] * bh,
- align_landmark_topx + keep_landmark[:, 4] * bw,
- align_landmark_topy + keep_landmark[:, 5] * bh,
- align_landmark_topx + keep_landmark[:, 6] * bw,
- align_landmark_topy + keep_landmark[:, 7] * bh,
- align_landmark_topx + keep_landmark[:, 8] * bw,
- align_landmark_topy + keep_landmark[:, 9] * bh,
- ])
-
- landmark_align = landmark.T
-
- return boxes_align, landmark_align
-
- def detect_face(self, img):
- """Detect face over image
- """
- img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
- boxes_align = np.array([])
- landmark_align = np.array([])
-
- t = time.time()
-
- # pnet
- if self.pnet_detector:
- boxes, boxes_align = self.detect_pnet(img)
- if boxes_align is None:
- return np.array([]), np.array([])
-
- t1 = time.time() - t
- t = time.time()
-
- vis_face(img_bg, boxes_align, None, './vis/pnet_output.jpg')
-
- # rnet
- if self.rnet_detector:
- boxes, boxes_align = self.detect_rnet(img, boxes_align)
- if boxes_align is None:
- return np.array([]), np.array([])
-
- t2 = time.time() - t
- t = time.time()
-
- vis_face(img_bg, boxes_align, None, './vis/rnet_output.jpg')
-
- # onet
- if self.onet_detector:
- boxes_align, landmark_align = self.detect_onet(img, boxes_align)
- if boxes_align is None:
- return np.array([]), np.array([])
-
- t3 = time.time() - t
- t = time.time()
- print(
- "time cost " + '{:.3f}'.format(t1 + t2 + t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2,
- t3))
-
- return boxes_align, landmark_align
|