lv-zhixuan
/
yolo5_tf2

 
			
			   
				 
					
						
						
							
							#!/usr/bin/python3

from os import mkdir, listdir;
from os.path import join, exists;
from shutil import rmtree;
from math import ceil;
from multiprocessing import Process;
from pycocotools.coco import COCO;
import numpy as np;
import cv2;
import tensorflow as tf;

PROCESS_NUM = 80;
label_map = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, -1, 25, 26, -1, -1, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, -1, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, -1, 61, -1, -1, 62, -1, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, 74, 75, 76, 77, 78, 79, 80];

def parse_function_generator(num_classes, img_shape = (608,608), random = True, jitter = .3):
  def parse_function(serialized_example):
    feature = tf.io.parse_single_example(
      serialized_example,
      features = {
        'image': tf.io.FixedLenFeature((), dtype = tf.string),
        'bbox': tf.io.VarLenFeature(dtype = tf.float32),
        'label': tf.io.VarLenFeature(dtype = tf.int64),
        'obj_num': tf.io.FixedLenFeature((), dtype = tf.int64)
      });
    obj_num = tf.keras.layers.Lambda(lambda x: tf.cast(x, dtype = tf.int32))(feature['obj_num']);
    image = tf.keras.layers.Lambda(lambda x: tf.io.decode_jpeg(x))(feature['image']);
    bbox = tf.keras.layers.Lambda(lambda x: tf.sparse.to_dense(x, default_value = 0))(feature['bbox']);
    bbox = tf.keras.layers.Lambda(lambda x: tf.reshape(x[0], (x[1], 4)))([bbox, obj_num]);
    labels = tf.keras.layers.Lambda(lambda x: tf.sparse.to_dense(x, default_value = 0))(feature['label']);
    labels = tf.keras.layers.Lambda(lambda x: tf.reshape(x[0], (x[1],)))([labels, obj_num]);
    # add batch dimension
    image = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis = 0))(image);
    bbox = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis = 0))(bbox);
    # augmentation
    if random == True:
      aspect_ratio_jitter = tf.keras.layers.Lambda(lambda x, j: tf.random.uniform(shape = (2,), minval = 1 - j, maxval = 1 + j, dtype = tf.float32), arguments = {'j': jitter})(image); # aspect_ratio_jitter.shape = (2)
      resize_input_shape = tf.keras.layers.Lambda(lambda x, h, w: tf.cast([h, w], dtype = tf.float32) * x, arguments = {'h': img_shape[1], 'w': img_shape[0]})(aspect_ratio_jitter); # resize_input_shape.shape = (2) in sequence of (h, w)
      scale = tf.keras.layers.Lambda(lambda x: tf.random.uniform(shape = (1,), minval = .8, maxval = 1.2, dtype = tf.float32))(image); # scale.shape = (1)
      resize_shape = tf.keras.layers.Lambda(lambda x: tf.cast(tf.cond(tf.greater(x[1][0], x[1][1]), true_fn = lambda: x[2] * x[1] / x[0][0], false_fn = lambda: x[2] * x[1] / x[0][1]), dtype = tf.int32))([aspect_ratio_jitter, resize_input_shape, scale]); # resize_shape.shape = (2) in sequence of (h, w)
      resize_image = tf.keras.layers.Lambda(lambda x: tf.image.resize(x[0], x[1], method = tf.image.ResizeMethod.BICUBIC))([image, resize_shape]);
      # 1) try to pad along height direction
      pad = tf.keras.layers.Lambda(lambda x, h: tf.math.maximum(h - x[0], 0), arguments = {'h': img_shape[1]})(resize_shape);
      pad_top = tf.keras.layers.Lambda(lambda x: tf.random.uniform(maxval = x + 1, shape = (), dtype = tf.int32))(pad);
      pad_bottom = tf.keras.layers.Lambda(lambda x: x[0] - x[1])([pad, pad_top]);
      hpad_image = tf.keras.layers.Lambda(lambda x: tf.pad(x[0], [[0,0],[x[1],x[2]],[0,0],[0,0]], constant_values = 128))([resize_image, pad_top, pad_bottom]);
      hpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] * tf.cast([[x[1][0], x[1][1], x[1][0], x[1][1]]], dtype = tf.float32))([bbox, resize_shape]);
      hpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] + tf.cast([[x[1], 0, x[1], 0]], dtype = tf.float32))([hpad_bbox, pad_top]);
      hpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] / tf.cast([[x[1][0] + x[2], x[1][1], x[1][0] + x[2], x[1][1]]], dtype = tf.float32))([hpad_bbox, resize_shape, pad]);
      resize_shape = tf.keras.layers.Lambda(lambda x: x[0] + tf.cast([x[1], 0], dtype = tf.int32))([resize_shape, pad]); # resize_shape = (h + pad, w)
      # 2) try to calculate crop along height direction
      crop = tf.keras.layers.Lambda(lambda x, h: tf.math.maximum(x[0] - h, 0), arguments = {'h': img_shape[1]})(resize_shape);
      crop_top = tf.keras.layers.Lambda(lambda x: tf.random.uniform(maxval = x + 1, shape = (), dtype = tf.int32))(crop);
      hcrop_image = tf.keras.layers.Lambda(lambda x, h: tf.image.crop_to_bounding_box(x[0], x[1], 0, h, tf.shape(x[0])[2]), arguments = {'h': img_shape[1]})([hpad_image, crop_top]);
      hcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] * tf.cast([[x[1][0], x[1][1], x[1][0], x[1][1]]], dtype = tf.float32))([hpad_bbox, resize_shape]);
      hcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] - tf.cast([[x[1], 0, x[1], 0]], dtype = tf.float32))([hcrop_bbox, crop_top]);
      hcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] / tf.cast([[x[1][0] - x[2], x[1][1], x[1][0] - x[2], x[1][1]]], dtype = tf.float32))([hcrop_bbox, resize_shape, crop]);
      resize_shape = tf.keras.layers.Lambda(lambda x: x[0] - tf.cast([x[1], 0], dtype = tf.int32))([resize_shape, crop]); # resize_shape = (h - crop, w)
      # 3) try to pad along width direction
      pad = tf.keras.layers.Lambda(lambda x, w: tf.math.maximum(w - x[1], 0), arguments = {'w': img_shape[0]})(resize_shape);
      pad_left = tf.keras.layers.Lambda(lambda x: tf.random.uniform(maxval = x + 1, shape = (), dtype = tf.int32))(pad);
      pad_right = tf.keras.layers.Lambda(lambda x: x[0] - x[1])([pad, pad_left]);
      wpad_image = tf.keras.layers.Lambda(lambda x: tf.pad(x[0], [[0,0],[0,0],[x[1],x[2]],[0,0]], constant_values = 128))([hcrop_image, pad_left, pad_right]);
      wpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] * tf.cast([[x[1][0], x[1][1], x[1][0], x[1][1]]], dtype = tf.float32))([hcrop_bbox, resize_shape]);
      wpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] + tf.cast([[0, x[1], 0, x[1]]], dtype = tf.float32))([wpad_bbox, pad_left]);
      wpad_bbox = tf.keras.layers.Lambda(lambda x: x[0] / tf.cast([[x[1][0], x[1][1] + x[2], x[1][0], x[1][1] + x[2]]], dtype = tf.float32))([wpad_bbox, resize_shape, pad]);
      resize_shape = tf.keras.layers.Lambda(lambda x: x[0] + tf.cast([0, x[1]], dtype = tf.int32))([resize_shape, pad]); # resize_shape = (h, w + pad)
      # 4) try to calculate crop along width direction
      crop = tf.keras.layers.Lambda(lambda x, w: tf.math.maximum(x[1] - w, 0), arguments = {'w': img_shape[0]})(resize_shape);
      crop_left = tf.keras.layers.Lambda(lambda x: tf.random.uniform(maxval = x + 1, shape = (), dtype = tf.int32))(crop);
      wcrop_image = tf.keras.layers.Lambda(lambda x, w: tf.image.crop_to_bounding_box(x[0], 0, x[1], tf.shape(x[0])[1], w), arguments = {'w': img_shape[0]})([wpad_image, crop_left]);
      wcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] * tf.cast([[x[1][0], x[1][1], x[1][0], x[1][1]]], dtype = tf.float32))([wpad_bbox, resize_shape]);
      wcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] - tf.cast([[0, x[1], 0, x[1]]], dtype = tf.float32))([wcrop_bbox, crop_left]);
      wcrop_bbox = tf.keras.layers.Lambda(lambda x: x[0] / tf.cast([[x[1][0], x[1][1] - x[2], x[1][0], x[1][1] - x[2]]], dtype = tf.float32))([wcrop_bbox, resize_shape, crop]);
      resize_shape = tf.keras.layers.Lambda(lambda x: x[0] - tf.cast([0, x[1]], dtype = tf.int32))([resize_shape, crop]); # resize_shape = (h, w - crop)
      # 5) random flip image
      flip = tf.keras.layers.Lambda(lambda x: tf.math.less(np.random.rand(), 0.5))(image);
      flip_image = tf.keras.layers.Lambda(lambda x: tf.cond(x[1], true_fn = lambda: tf.image.flip_left_right(x[0]), false_fn = lambda: x[0]))([wcrop_image, flip]);
      final_bbox = tf.keras.layers.Lambda(lambda x: tf.cond(x[1], true_fn = lambda: x[0] * tf.cast([1,-1,1,-1], dtype = tf.float32) + tf.cast([0,1,0,1], dtype = tf.float32), false_fn = lambda: x[0]))([wcrop_bbox, flip]);
      # 6) distort image in HSV color space
      color_distort_image = tf.keras.layers.Lambda(lambda x: tf.image.random_hue(x, 10 / 180))(flip_image);
      color_distort_image = tf.keras.layers.Lambda(lambda x: tf.image.random_saturation(x, 0, 10))(color_distort_image);
      final_image = tf.keras.layers.Lambda(lambda x: tf.image.random_brightness(x, 10 / 255))(color_distort_image);
    else:
      resize_image = tf.keras.layers.Lambda(lambda x, h, w: tf.image.resize(x, (h, w), method = tf.image.ResizeMethod.BICUBIC, preserve_aspect_ratio = True), arguments = {'h': img_shape[1], 'w': img_shape[0]})(image); # resize_image.shape = (batch, nh, nw, 3)
      pad_image = tf.keras.layers.Lambda(lambda x, h, w: tf.pad(x, [[0,0],
                                                                    [(h - tf.shape(x)[1])//2, (h - tf.shape(x)[1]) - (h - tf.shape(x)[1])//2],
                                                                    [(w - tf.shape(x)[2])//2, (w - tf.shape(x)[2]) - (w - tf.shape(x)[2])//2],
                                                                    [0,0]], constant_values = 128), 
                                        arguments = {'h': img_shape[1], 'w': img_shape[0]})(resize_image); # resize_image.shape = (batch, 608, 608, 3)
      final_image = tf.keras.layers.Lambda(lambda x: tf.cast(x, tf.float32))(pad_image); # image_data.shape = (batch, 608, 608, 3)
      resize_bbox = tf.keras.layers.Lambda(lambda x: x[0] * tf.cast([[[tf.shape(x[1])[1], 
                                                                      tf.shape(x[1])[2], 
                                                                      tf.shape(x[1])[1], 
                                                                      tf.shape(x[1])[2]]]], dtype = tf.float32))([bbox, resize_image]); # resize_bbox.shape = (batch, obj_num, 4)
      pad_bbox = tf.keras.layers.Lambda(lambda x, h, w: x[0] + tf.cast([[[(h - tf.shape(x[1])[1])//2,
                                                                          (w - tf.shape(x[1])[2])//2,
                                                                          (h - tf.shape(x[1])[1])//2,
                                                                          (w - tf.shape(x[1])[2])//2]]], dtype = tf.float32), 
                                        arguments = {'h': img_shape[1], 'w': img_shape[0]})([resize_bbox, resize_image]);
      final_bbox = tf.keras.layers.Lambda(lambda x, h, w: x / tf.cast([[[h, w, h, w]]], dtype = tf.float32),
                                          arguments = {'h': img_shape[1], 'w': img_shape[0]})(pad_bbox);
    final_image = tf.keras.layers.Lambda(lambda x: x / 255.)(final_image);
    image = tf.keras.layers.Lambda(lambda x: tf.squeeze(x, axis = 0))(final_image); # image.shape = (height, width, 3)
    bbox = tf.keras.layers.Lambda(lambda x: tf.squeeze(x, axis = 0))(final_bbox); # bbox.shape = (obj_num, 4)
    # generate label tensors
    anchors = [[[116,90], [156,198], [373,326]], [[30,61], [62,45], [59,119]], [[10,13], [16,30], [33,23]]]; # anchors.shape = (level num = 3, anchor num = 3, 2)
    # 1) choose the best anchor box with the maximum of IOU with target bounding
    relative_bbox_center = tf.keras.layers.Lambda(lambda x: tf.reverse((x[..., 0:2] + x[..., 2:4]) / 2, axis = [-1]))(bbox); # relative_bbox_center.shape = (obj_num, 2) in sequence of (center x, center y)
    relative_bbox_wh = tf.keras.layers.Lambda(lambda x: tf.reverse(tf.math.abs(x[..., 2:4] - x[..., 0:2]), axis = [-1]))(bbox); # relative_bbox_wh.shape = (obj_num, 2) in sequence of (w, h)
    relative_bbox = tf.keras.layers.Concatenate(axis = -1)([relative_bbox_center, relative_bbox_wh]); # relative_bbox.shape = (obj_num, 4) in sequence of (center x, center y, w, h)
    valid_bbox = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x, tf.math.logical_and(tf.math.greater(x[...,2], 0), tf.math.greater(x[...,3], 0))))(relative_bbox); # valid_bbox.shape = (valid_num, 4) in sequence of (center x, center y, w, h)
    valid_labels = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], tf.math.logical_and(tf.math.greater(x[1][...,2], 0), tf.math.greater(x[1][...,3], 0))))([labels, relative_bbox]); # valid_labels.shape = (valid_num)
    bbox_maxes = tf.keras.layers.Lambda(lambda x, s: x[...,2:4] * tf.expand_dims(tf.cast(s, dtype = tf.float32), axis = 0) / 2, arguments = {'s': img_shape})(valid_bbox); # bbox_maxes.shape = (valid_num, 2)
    bbox_mins = tf.keras.layers.Lambda(lambda x: -x)(bbox_maxes); # bbox_mins.shape = (valid_num, 2)
    bbox_wh = tf.keras.layers.Lambda(lambda x: x[0] - x[1])([bbox_maxes, bbox_mins]); # bbox_wh.shape = (valid num, 2)
    bbox_area = tf.keras.layers.Lambda(lambda x: tf.reshape(x[...,0] * x[...,1], (1, 1, -1)))(bbox_wh); # bbox_area.shape = (1, 1, valid_num)
    intersect_maxes = tf.keras.layers.Lambda(lambda x, a: tf.math.minimum(tf.expand_dims(tf.cast(a, dtype = tf.float32)/2, axis = -2), tf.expand_dims(tf.expand_dims(x, axis = 0), axis = 0)), arguments = {'a': anchors})(bbox_maxes); # intersect_maxes.shape = (level num, anchor num, valid num, 2)
    intersect_mins = tf.keras.layers.Lambda(lambda x, a: tf.math.maximum(tf.expand_dims(-tf.cast(a, dtype = tf.float32)/2, axis = -2), tf.expand_dims(tf.expand_dims(x, axis = 0), axis = 0)), arguments = {'a': anchors})(bbox_mins); # intersect_mins.shape = (level num, anchor num, valid num, 2)
    intersect_wh = tf.keras.layers.Lambda(lambda x: tf.math.maximum(x[0] - x[1], 0.))([intersect_maxes, intersect_mins]); # intersect_wh.shape = (level num, anchor num, valid num, 2)
    intersect_area = tf.keras.layers.Lambda(lambda x: x[...,0] * x[...,1])(intersect_wh); # intersect_area.shape = (level num, anchor num, valid num)
    iou = tf.keras.layers.Lambda(lambda x, a: x[0] / (x[1] + tf.expand_dims(tf.cast(a, dtype = tf.float32)[...,0] * tf.cast(a, dtype = tf.float32)[...,1], axis = -1)), arguments = {'a': anchors})([intersect_area, bbox_area]); # iou.shape = (level num, anchor num, valid num)
    best_idx = tf.keras.layers.Lambda(lambda x: tf.math.argmax(tf.reshape(x, (-1, tf.shape(x)[-1])), axis = 0))(iou); # best_idx.shape = (valid num)
    best_levels = tf.keras.layers.Lambda(lambda x: x // 3)(best_idx); # best_levels.shape = (valid_num)
    best_anchors = tf.keras.layers.Lambda(lambda x: x % 3)(best_idx); # best_anchors.shape = (valid_num)
    # 2) generate labels
    level1_mask = tf.keras.layers.Lambda(lambda x: tf.math.equal(x, 0))(best_levels); # level1_mask.shape = (valid_num)
    level1_anchors = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([best_anchors, level1_mask]); # level1_anchors.shape = (level1 num)
    level1_bbox = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_bbox, level1_mask]); # level1_bbox.shape = (level1 num, 4) in sequence of (center x, center y, w, h)
    level1_labels = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_labels, level1_mask]); # level1_labels.shape = (level1 num)
    level1_coords = tf.keras.layers.Lambda(lambda x, h, w: tf.clip_by_value(
      tf.cast(
        tf.concat([
          tf.reverse(x[0][..., 0:2], axis = [-1]) * tf.constant([[h // 32, w // 32]], dtype = tf.float32), # shape = (level1 num, 2)
          tf.expand_dims(tf.cast(x[1], dtype = tf.float32), axis = -1) # shape = (level1 num, 1)
        ], axis = -1), dtype = tf.int32), 
      clip_value_min = 0, clip_value_max = [[h//32-1, w//32-1, 2]]), 
      arguments = {'h': img_shape[1], 'w': img_shape[0]})([level1_bbox, level1_anchors]); # level1_coords.shape = (level1_num, 3) in sequence of (h, w, anchor)
    assert_level1 = tf.debugging.Assert(tf.math.reduce_all(tf.math.logical_and(tf.math.greater_equal(level1_labels, 0), tf.math.less_equal(level1_labels, 79))), [level1_labels]);
    level1_outputs = tf.keras.layers.Lambda(lambda x, c: tf.concat([x[0], tf.ones((tf.shape(x[0])[0], 1), dtype = tf.float32), tf.one_hot(tf.cast(x[1], dtype = tf.int32), c)], axis = -1), arguments = {'c': num_classes})([level1_bbox, level1_labels]); # level1_outputs.shape = (level1_num, 5 + c)
    level1_gt = tf.keras.layers.Lambda(lambda x, h, w, c: tf.scatter_nd(updates = x[0], indices = x[1], shape = (h // 32, w // 32, 3, 5 + c)), arguments = {'h': img_shape[1], 'w': img_shape[0], 'c': num_classes})([level1_outputs, level1_coords]); # level1_gt.shape = (h//32, w//32, 3, 5+c)
    level2_mask = tf.keras.layers.Lambda(lambda x: tf.math.equal(x, 1))(best_levels); # level2_mask.shape = (valid_num)
    level2_anchors = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([best_anchors, level2_mask]); # level2_anchors.shape = (level2 num)
    level2_bbox = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_bbox, level2_mask]); # level2_bbox.shape = (level2 num, 4)
    level2_labels = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_labels, level2_mask]); # level2_labels.shape = (level2 num)
    level2_coords = tf.keras.layers.Lambda(lambda x, h, w: tf.clip_by_value(
      tf.cast(
        tf.concat([
          tf.reverse(x[0][..., 0:2], axis = [-1]) * tf.constant([[h // 16, w // 16]], dtype = tf.float32), # shape = (level2 num, 2)
          tf.expand_dims(tf.cast(x[1], dtype = tf.float32), axis = -1) # shape = (level2 num, 1)
        ], axis = -1), dtype = tf.int32), 
      clip_value_min = 0, clip_value_max = [[h//16-1, w//16-1, 2]]), 
      arguments = {'h': img_shape[1], 'w': img_shape[0]})([level2_bbox, level2_anchors]); # level2_outputs.shape = (level2_num, 3) in sequence of (h, w, anchor)
    assert_level2 = tf.debugging.Assert(tf.math.reduce_all(tf.math.logical_and(tf.math.greater_equal(level2_labels, 0), tf.math.less_equal(level2_labels, 79))), [level2_labels]);
    level2_outputs = tf.keras.layers.Lambda(lambda x, c: tf.concat([x[0], tf.ones((tf.shape(x[0])[0], 1), dtype = tf.float32), tf.one_hot(tf.cast(x[1], dtype = tf.int32), c)], axis = -1), arguments = {'c': num_classes})([level2_bbox, level2_labels]); # level2_outputs.shape = (level2_num, 5 + c)
    level2_gt = tf.keras.layers.Lambda(lambda x, h, w, c: tf.scatter_nd(updates = x[0], indices = x[1], shape = (h // 16, w // 16, 3, 5 + c)), arguments = {'h': img_shape[1], 'w': img_shape[0], 'c': num_classes})([level2_outputs, level2_coords]); # level2_gt.shape = (h//16, w//16, 3, 5+c)
    level3_mask = tf.keras.layers.Lambda(lambda x: tf.math.equal(x, 2))(best_levels); # level3_mask.shape = (valid_num)
    level3_anchors = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([best_anchors, level3_mask]); # level3_anchors.shape = (level3 num)
    level3_bbox = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_bbox, level3_mask]); # level3_bbox.shape = (level3 num, 4)
    level3_labels = tf.keras.layers.Lambda(lambda x: tf.boolean_mask(x[0], x[1]))([valid_labels, level3_mask]); # level3_labels.shape = (level3 num)
    level3_coords = tf.keras.layers.Lambda(lambda x, h, w: tf.clip_by_value(
      tf.cast(
        tf.concat([
          tf.reverse(x[0][..., 0:2], axis = [-1]) * tf.constant([[h // 8, w // 8]], dtype = tf.float32), # shape = (level3 num, 2)
          tf.expand_dims(tf.cast(x[1], dtype = tf.float32), axis = -1) # shape = (level3 num, 1)
        ], axis = -1), dtype = tf.int32), 
      clip_value_min = 0, clip_value_max = [[h//8-1, w//8-1, 2]]), 
      arguments = {'h': img_shape[1], 'w': img_shape[0]})([level3_bbox, level3_anchors]); # level3_outputs.shape = (level3_num, 3) in sequence of (h, w, anchor)
    assert_level3 = tf.debugging.Assert(tf.math.reduce_all(tf.math.logical_and(tf.math.greater_equal(level3_labels, 0), tf.math.less_equal(level3_labels, 79))), [level3_labels]);
    level3_outputs = tf.keras.layers.Lambda(lambda x, c: tf.concat([x[0], tf.ones((tf.shape(x[0])[0], 1), dtype = tf.float32), tf.one_hot(tf.cast(x[1], dtype = tf.int32), c)], axis = -1), arguments = {'c': num_classes})([level3_bbox, level3_labels]); # level3_outputs.shape = (level3_num, 5 + c)
    level3_gt = tf.keras.layers.Lambda(lambda x, h, w, c: tf.scatter_nd(updates = x[0], indices = x[1], shape = (h // 8, w // 8, 3, 5 + c)), arguments = {'h': img_shape[1], 'w': img_shape[0], 'c': num_classes})([level3_outputs, level3_coords]); # level3_gt.shape = (h//8, w//8, 3, 5+c)
    return image, (level1_gt, level2_gt, level3_gt);
  return parse_function;

def parse_function(serialized_example):
  
  feature = tf.io.parse_single_example(
    serialized_example,
    features = {
      'image': tf.io.FixedLenFeature((), dtype = tf.string),
      'bbox': tf.io.VarLenFeature(dtype = tf.float32),
      'label': tf.io.VarLenFeature(dtype = tf.int64),
      'obj_num': tf.io.FixedLenFeature((), dtype = tf.int64)
    });
  obj_num = tf.cast(feature['obj_num'], dtype = tf.int32);
  image = tf.io.decode_jpeg(feature['image']);
  bbox = tf.sparse.to_dense(feature['bbox'], default_value = 0);
  bbox = tf.reshape(bbox, (obj_num, 4));
  label = tf.sparse.to_dense(feature['label'], default_value = 0);
  label = tf.reshape(label, [obj_num]);
  return image, bbox, label;

def create_dataset(image_dir, label_dir, trainset = True):

  anno = COCO(join(label_dir, 'instances_train2017.json' if trainset else 'instances_val2017.json'));
  if exists('trainset' if trainset else 'testset'): rmtree('trainset' if trainset else 'testset');
  mkdir('trainset' if trainset else 'testset');
  imgs_for_each = ceil(len(anno.getImgIds()) / PROCESS_NUM);
  handlers = list();
  filenames = list();
  for i in range(PROCESS_NUM):
    filename = ('trainset_part_%d' if trainset else 'testset_part_%d') % i;
    filenames.append(join('trainset' if trainset else 'testset', filename));
    handlers.append(Process(target = worker, args = (join('trainset' if trainset else 'testset', filename), anno, image_dir, anno.getImgIds()[i * imgs_for_each:(i+1) * imgs_for_each] if i != PROCESS_NUM - 1 else anno.getImgIds()[i * imgs_for_each:])));
    handlers[-1].start();
  for handler in handlers:
    handler.join();

def worker(filename, anno, image_dir, image_ids):
  writer = tf.io.TFRecordWriter(filename);
  for image in image_ids:
    img_info = anno.loadImgs([image])[0];
    height, width = img_info['height'], img_info['width'];
    img = cv2.imread(join(image_dir, img_info['file_name']));
    if img is None:
      print('can\'t open image %s' % (join(image_dir, img_info['file_name'])));
      continue;
    annIds = anno.getAnnIds(imgIds = image);
    anns = anno.loadAnns(annIds);
    bboxs = list();
    labels = list();
    for ann in anns:
      # bounding box
      bbox_x, bbox_y, bbox_w, bbox_h = ann['bbox'];
      # relative upper left y, x, bottom right y, x with respect to the height and width
      bbox = tf.constant([bbox_y / height, bbox_x / width, (bbox_y + bbox_h) / height, (bbox_x + bbox_w) / width], dtype = tf.float32);
      bboxs.append(bbox);
      # category
      category = label_map[ann['category_id']];
      assert category != -1 and category != 0; # can't be not presented category (-1) can't be background category (0)
      labels.append(category - 1);
      assert 0 <= labels[-1] <= 79;
    obj_num = len(bboxs);
    bboxs = tf.cast(tf.stack(bboxs, axis = 0), dtype = tf.float32); # bboxs.shape = (obj_num, 4)
    labels = tf.cast(tf.stack(labels, axis = 0), dtype = tf.int32); # labels.shape = (obj_num)
    assert labels.shape[0] == bboxs.shape[0] and labels.shape[0] == obj_num;
    trainsample = tf.train.Example(features = tf.train.Features(
      feature = {
        'image': tf.train.Feature(bytes_list = tf.train.BytesList(value = [tf.io.encode_jpeg(img).numpy()])),
        'bbox': tf.train.Feature(float_list = tf.train.FloatList(value = tf.reshape(bboxs, (-1)))),
        'label': tf.train.Feature(int64_list = tf.train.Int64List(value = tf.reshape(labels, (-1)))),
        'obj_num': tf.train.Feature(int64_list = tf.train.Int64List(value = [obj_num]))
      }
    ));
    writer.write(trainsample.SerializeToString());
  writer.close();

if __name__ == "__main__":

  '''
  # this code is for testing data augmentation
  trainset_filenames = [join('testset', filename) for filename in listdir('testset')];
  trainset = tf.data.TFRecordDataset(trainset_filenames).map(parse_function_generator(80));
  for image, labels in trainset:
    image = image * 255.; # image.shape = (608, 608, 3)
    labels1, labels2, labels3 = labels; # labels1.shape = (13, 13, 3, 85) labels2.shape = (26, 26, 3, 85) labels3.shape = (52, 52, 3, 85)
    mask1 = tf.math.equal(labels1[..., 4], 1); # mask1.shape = (13, 13, 3)
    mask2 = tf.math.equal(labels2[..., 4], 1); # mask2.shape = (26, 26, 3)
    mask3 = tf.math.equal(labels3[..., 4], 1); # msak3.shape = (52, 52, 3)
    labels1 = tf.boolean_mask(labels1, mask1); # labels1.shape = (obj_num, 85)
    labels2 = tf.boolean_mask(labels2, mask2); # labels2.shape = (obj_num, 85)
    labels3 = tf.boolean_mask(labels3, mask3); # labels3.shape = (obj_num, 85)
    bbox = tf.concat([labels1[..., 0:4], labels2[..., 0:4], labels3[..., 0:4]], axis = 0); # bbox.shape = (total obj num, 4) in sequence of (center x, y, w, h)
    half_wh = bbox[..., 2:4] / 2;
    upperleft = (bbox[..., 0:2] - half_wh) * tf.cast([tf.shape(image)[1], tf.shape(image)[0]], dtype = tf.float32); # upperleft.shape = (total obj num, 2)
    bottomright = (bbox[..., 0:2] + half_wh) * tf.cast([tf.shape(image)[1], tf.shape(image)[0]], dtype = tf.float32); # bottomright.shape = (total obj num, 2)
    bbox = tf.concat([upperleft, bottomright], axis = -1); # bbox.shape = (total obj num, 4)
    img = image.numpy().astype('uint8');
    for box in bbox:
      ul = tuple(box[0:2].numpy().astype(int));
      br = tuple(box[2:4].numpy().astype(int));
      cv2.rectangle(img, ul, br, (0,255,0), 2);
    cv2.imshow('objects', img);
    cv2.waitKey();
  exit(0);
  '''

  from sys import argv;
  if len(argv) != 4:
    print("Usage: " + argv[0] + "<train image dir> <test image dir> <anno dir>");
    exit(1);
  assert tf.executing_eagerly() == True;
  create_dataset(argv[2], argv[3], False);
  create_dataset(argv[1], argv[3], True);