#6 master

Merged
lemon merged 2 commits from jingmohan/mindface_retinaface_resnet50:master into master 1 year ago
  1. +8
    -7
      configs/RetinaFace_resnet50.yaml
  2. +52
    -13
      eval.py
  3. +1
    -1
      models/retinaface.py
  4. BIN
      pretrained/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt
  5. +10
    -82
      train.py
  6. +80
    -0
      utils/qizhi_config.py

+ 8
- 7
configs/RetinaFace_resnet50.yaml View File

@@ -21,6 +21,7 @@
'num_classes' : 2
'device_id': 0
'grad_clip': True
'device_id': 0

# opt
'optim': 'sgd'
@@ -42,23 +43,23 @@
'gamma': 0.1

# checkpoint
'ckpt_path': './resnet_graph/'
'save_checkpoint_steps': 1608
'keep_checkpoint_max': 10
'ckpt_path': '/cache/output/'
'save_checkpoint_steps': 804
'keep_checkpoint_max': 50
'resume_net': ~


# dataset
'training_dataset': 'data/WiderFace/train/label.txt'
'training_dataset': '/data/WiderFace/train/label.txt'
'pretrain': True
'pretrain_path': 'pretrained/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt'
'pretrain_path': '/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt'

# val
'val_model': 'RetinaFace.ckpt'
'val_dataset_folder': 'data/WiderFace/val/'
'val_dataset_folder': '/data/WiderFace/val/'
'val_origin_size': False
'val_confidence_threshold': 0.02
'val_nms_threshold': 0.4
'val_iou_threshold': 0.5
'val_save_result': False
'val_gt_dir': 'data/WiderFace/ground_truth'
'val_gt_dir': '/data/WiderFace/ground_truth'

+ 52
- 13
eval.py View File

@@ -20,12 +20,13 @@ import cv2
from mindspore import Tensor, context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore import ops
from utils.qizhi_config import *

from utils import prior_box
from models import RetinaFace, resnet50, mobilenet025
from runner import DetectionEngine, Timer, read_yaml

def val(cfg):
def val(cfg,local_path):
"""val"""
if cfg['mode'] == 'Graph':
context.set_context(mode=context.GRAPH_MODE, device_target=cfg['device_target'])
@@ -49,8 +50,8 @@ def val(cfg):
load_param_into_net(network, param_dict)

# testing dataset
testset_folder = cfg['val_dataset_folder']
testset_label_path = cfg['val_dataset_folder'] + "label.txt"
testset_folder = local_path+cfg['val_dataset_folder']
testset_label_path = local_path+cfg['val_dataset_folder'] + "label.txt"
with open(testset_label_path, 'r', encoding = 'utf-8') as file:
all_test_dataset = file.readlines()
test_dataset = []
@@ -89,7 +90,7 @@ def val(cfg):

# init detection engine
detection = DetectionEngine(nms_thresh=cfg['val_nms_threshold'], conf_thresh=cfg['val_confidence_threshold'],
iou_thresh=cfg['val_iou_threshold'], var=cfg['variance'], gt_dir=cfg['val_gt_dir'])
iou_thresh=cfg['val_iou_threshold'], var=cfg['variance'], gt_dir=local_path+cfg['val_gt_dir'])


# testing begin
@@ -144,8 +145,12 @@ def val(cfg):
confs_all = confs
resize_all = [resize]
else:
boxes_all = ops.concat((boxes_all,boxes))
confs_all = ops.concat((confs_all,confs),axis=1)
op = ops.Concat()
op1 = ops.Concat(1)
boxes_all = op((boxes_all, boxes))
confs_all = op1((confs_all,confs))
# boxes_all = ops.concat((boxes_all,boxes))
# confs_all = ops.concat((confs_all,confs),axis=1)
resize_all.append(resize)
timers['forward_time'].end()

@@ -183,14 +188,48 @@ def val(cfg):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='val')
# configs
parser.add_argument('--config', default='mindface/detection/configs/RetinaFace_mobilenet025.yaml', type=str,
help='configs path')
parser.add_argument('--checkpoint', type=str, default='',
local_path = os.path.abspath(__file__)[:-8]
parser.add_argument('--ckpt_url', type=str, default='',
help='checpoint path')

# configs
parser.add_argument('--train_url',
help='output folder to save/load',
default= '/cache/output/')

parser.add_argument('--result_url',
help='output folder to save/load',
default= '/cache/output/')

parser.add_argument('--data_url',
help='path to training/inference dataset folder',
default= '/cache/data/')


parser.add_argument(
'--device_target',
type=str,
default="Ascend",
choices=['Ascend', 'CPU'],
help='device where the code will be implemented (default: Ascend),if to use the CPU on the Qizhi platform:device_target=CPU')


parser.add_argument('--config', default='RetinaFace_mobilenet025.yaml', type=str ,help='config path')

args = parser.parse_args()

config = read_yaml(args.config)

if args.checkpoint:
config['val_model'] = args.checkpoint
val(cfg=config)
###Initialize and copy data to training image
DownloadFromQizhi(args.data_url, data_dir=local_path+'/data')
DownloadFromQizhi(args.ckpt_url, data_dir=local_path+'/'+args.ckpt_url.split('/')[-1])
###The dataset path is used here:data_dir +"/train"

print(local_path+'/'+args.ckpt_url.split('/')[-1])
config = read_yaml(local_path + '/configs/' + args.config)

if args.ckpt_url:
config['val_model'] = local_path+'/'+args.ckpt_url.split('/')[-1]
val(cfg=config, local_path=local_path)

+ 1
- 1
models/retinaface.py View File

@@ -84,7 +84,7 @@ class ConvBNReLU(nn.SequentialCell):
nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding, group=groups,
has_bias=False, weight_init=kaiming_weight),
norm_layer(out_planes),
# nn.LeakyReLU(alpha=leaky)
#nn.LeakyReLU(alpha=leaky)
nn.ReLU()
)



BIN
pretrained/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt View File


+ 10
- 82
train.py View File

@@ -27,87 +27,11 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net
from loss import MultiBoxLoss
from datasets import create_dataset
from utils import adjust_learning_rate
from utils.qizhi_config import *

from models import RetinaFace, RetinaFaceWithLossCell, resnet50, mobilenet025
from models import RetinaFace, RetinaFaceWithLossCell, resnet50
from runner import read_yaml, TrainingWrapper

from mindspore.context import ParallelMode
import mindspore.ops as ops
import time
import moxing as mox
from mindspore.train.callback import Callback
import os
import sys

ab_path = '/home/work/user-job-dir/V0001'

class UploadOutput(Callback):
def __init__(self, train_dir, obs_train_url):
self.train_dir = train_dir
self.obs_train_url = obs_train_url
def epoch_end(self,run_context):
try:
mox.file.copy_parallel(self.train_dir , self.obs_train_url )
print("Successfully Upload {} to {}".format(self.train_dir ,self.obs_train_url ))
except Exception as e:
print('moxing upload {} to {} failed: '.format(self.train_dir ,self.obs_train_url ) + str(e))
return

### Copy single dataset from obs to training image###
def ObsToEnv(obs_data_url, data_dir):
try:
mox.file.copy_parallel(obs_data_url, data_dir)
print("Successfully Download {} to {}".format(obs_data_url, data_dir))
except Exception as e:
print('moxing download {} to {} failed: '.format(obs_data_url, data_dir) + str(e))
#Set a cache file to determine whether the data has been copied to obs.
#If this file exists during multi-card training, there is no need to copy the dataset multiple times.
f = open("/cache/download_input.txt", 'w')
f.close()
try:
if os.path.exists("/cache/download_input.txt"):
print("download_input succeed")
except Exception as e:
print("download_input failed")
return
### Copy the output to obs###
def EnvToObs(train_dir, obs_train_url):
try:
mox.file.copy_parallel(train_dir, obs_train_url)
print("Successfully Upload {} to {}".format(train_dir,obs_train_url))
except Exception as e:
print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e))
return
def DownloadFromQizhi(obs_data_url, data_dir):
device_num = int(os.getenv('RANK_SIZE'))
if device_num == 1:
ObsToEnv(obs_data_url,data_dir)
# context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target)
# if device_num > 1:
# # set device_id and init for multi-card training
# # context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=int(os.getenv('ASCEND_DEVICE_ID')))
# # context.reset_auto_parallel_context()
# # context.set_auto_parallel_context(device_num = device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True)
# # init()
# #Copying obs data does not need to be executed multiple times, just let the 0th card copy the data
# local_rank=int(os.getenv('RANK_ID'))
# if local_rank%8==0:
# ObsToEnv(obs_data_url,data_dir)
# #If the cache file does not exist, it means that the copy data has not been completed,
# #and Wait for 0th card to finish copying data
# while not os.path.exists("/cache/download_input.txt"):
# time.sleep(1)
# return
def UploadToQizhi(train_dir, obs_train_url):
device_num = int(os.getenv('RANK_SIZE'))
local_rank=int(os.getenv('RANK_ID'))
if device_num == 1:
EnvToObs(train_dir, obs_train_url)
if device_num > 1:
if local_rank%8==0:
EnvToObs(train_dir, obs_train_url)
return

def train(cfg,args):
"""train"""
mindspore.common.seed.set_seed(cfg['seed'])
@@ -137,6 +61,8 @@ def train(cfg,args):
rank = get_rank()
print(f"The rank ID of current device is {rank}.")



batch_size = cfg['batch_size']
max_epoch = cfg['epoch']
clip = cfg['clip']
@@ -145,7 +71,7 @@ def train(cfg,args):
weight_decay = cfg['weight_decay']
initial_lr = cfg['initial_lr']
gamma = cfg['gamma']
training_dataset = args.local_path + '/' + cfg['training_dataset']
training_dataset = args.local_path + cfg['training_dataset']
num_classes = cfg['num_classes']
negative_ratio = 7
stepvalues = (cfg['decay1'], cfg['decay2'])
@@ -241,7 +167,8 @@ if __name__ == '__main__':

parser.add_argument('--local_path', help='local_path', default= local_path)

args = parser.parse_args()

print(args.local_path)
@@ -257,7 +184,8 @@ if __name__ == '__main__':
DownloadFromQizhi(args.data_url, data_dir=args.local_path+'/data')
###The dataset path is used here:data_dir +"/train"


config = read_yaml(local_path + '/configs/' + args.config)
train(cfg=config, args=args)
train(cfg=config,args =args)

UploadToQizhi(train_dir,args.train_url)
UploadToQizhi(train_dir,args.train_url)

+ 80
- 0
utils/qizhi_config.py View File

@@ -0,0 +1,80 @@
import argparse
import math
import mindspore

from runner import read_yaml, TrainingWrapper

from mindspore.context import ParallelMode
import mindspore.ops as ops
import time
import moxing as mox
from mindspore.train.callback import Callback
import os
import sys

class UploadOutput(Callback):
def __init__(self, train_dir, obs_train_url):
self.train_dir = train_dir
self.obs_train_url = obs_train_url
def epoch_end(self,run_context):
try:
mox.file.copy_parallel(self.train_dir , self.obs_train_url )
print("Successfully Upload {} to {}".format(self.train_dir ,self.obs_train_url ))
except Exception as e:
print('moxing upload {} to {} failed: '.format(self.train_dir ,self.obs_train_url ) + str(e))
return

### Copy single dataset from obs to training image###
def ObsToEnv(obs_data_url, data_dir):
try:
mox.file.copy_parallel(obs_data_url, data_dir)
print("Successfully Download {} to {}".format(obs_data_url, data_dir))
except Exception as e:
print('moxing download {} to {} failed: '.format(obs_data_url, data_dir) + str(e))
#Set a cache file to determine whether the data has been copied to obs.
#If this file exists during multi-card training, there is no need to copy the dataset multiple times.
f = open("/cache/download_input.txt", 'w')
f.close()
try:
if os.path.exists("/cache/download_input.txt"):
print("download_input succeed")
except Exception as e:
print("download_input failed")
return
### Copy the output to obs###
def EnvToObs(train_dir, obs_train_url):
try:
mox.file.copy_parallel(train_dir, obs_train_url)
print("Successfully Upload {} to {}".format(train_dir,obs_train_url))
except Exception as e:
print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e))
return
def DownloadFromQizhi(obs_data_url, data_dir):
device_num = int(os.getenv('RANK_SIZE'))
if device_num == 1:
ObsToEnv(obs_data_url,data_dir)
# context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target)
# if device_num > 1:
# # set device_id and init for multi-card training
# # context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=int(os.getenv('ASCEND_DEVICE_ID')))
# # context.reset_auto_parallel_context()
# # context.set_auto_parallel_context(device_num = device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True)
# # init()
# #Copying obs data does not need to be executed multiple times, just let the 0th card copy the data
# local_rank=int(os.getenv('RANK_ID'))
# if local_rank%8==0:
# ObsToEnv(obs_data_url,data_dir)
# #If the cache file does not exist, it means that the copy data has not been completed,
# #and Wait for 0th card to finish copying data
# while not os.path.exists("/cache/download_input.txt"):
# time.sleep(1)
# return
def UploadToQizhi(train_dir, obs_train_url):
device_num = int(os.getenv('RANK_SIZE'))
local_rank=int(os.getenv('RANK_ID'))
if device_num == 1:
EnvToObs(train_dir, obs_train_url)
if device_num > 1:
if local_rank%8==0:
EnvToObs(train_dir, obs_train_url)
return

Loading…
Cancel
Save