From 3d0647c410d694d66f231a3a3bb34184eed89b0a Mon Sep 17 00:00:00 2001 From: lemon Date: Sat, 28 Jan 2023 14:03:19 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E8=87=B3=20'utils'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/qizhi_config.py | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 utils/qizhi_config.py diff --git a/utils/qizhi_config.py b/utils/qizhi_config.py new file mode 100644 index 0000000..8adc8bd --- /dev/null +++ b/utils/qizhi_config.py @@ -0,0 +1,80 @@ +import argparse +import math +import mindspore + +from runner import read_yaml, TrainingWrapper + +from mindspore.context import ParallelMode +import mindspore.ops as ops +import time +import moxing as mox +from mindspore.train.callback import Callback +import os +import sys + +class UploadOutput(Callback): + def __init__(self, train_dir, obs_train_url): + self.train_dir = train_dir + self.obs_train_url = obs_train_url + def epoch_end(self,run_context): + try: + mox.file.copy_parallel(self.train_dir , self.obs_train_url ) + print("Successfully Upload {} to {}".format(self.train_dir ,self.obs_train_url )) + except Exception as e: + print('moxing upload {} to {} failed: '.format(self.train_dir ,self.obs_train_url ) + str(e)) + return + +### Copy single dataset from obs to training image### +def ObsToEnv(obs_data_url, data_dir): + try: + mox.file.copy_parallel(obs_data_url, data_dir) + print("Successfully Download {} to {}".format(obs_data_url, data_dir)) + except Exception as e: + print('moxing download {} to {} failed: '.format(obs_data_url, data_dir) + str(e)) + #Set a cache file to determine whether the data has been copied to obs. + #If this file exists during multi-card training, there is no need to copy the dataset multiple times. + f = open("/cache/download_input.txt", 'w') + f.close() + try: + if os.path.exists("/cache/download_input.txt"): + print("download_input succeed") + except Exception as e: + print("download_input failed") + return +### Copy the output to obs### +def EnvToObs(train_dir, obs_train_url): + try: + mox.file.copy_parallel(train_dir, obs_train_url) + print("Successfully Upload {} to {}".format(train_dir,obs_train_url)) + except Exception as e: + print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e)) + return +def DownloadFromQizhi(obs_data_url, data_dir): + device_num = int(os.getenv('RANK_SIZE')) + if device_num == 1: + ObsToEnv(obs_data_url,data_dir) + # context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target) + # if device_num > 1: + # # set device_id and init for multi-card training + # # context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=int(os.getenv('ASCEND_DEVICE_ID'))) + # # context.reset_auto_parallel_context() + # # context.set_auto_parallel_context(device_num = device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True) + # # init() + # #Copying obs data does not need to be executed multiple times, just let the 0th card copy the data + # local_rank=int(os.getenv('RANK_ID')) + # if local_rank%8==0: + # ObsToEnv(obs_data_url,data_dir) + # #If the cache file does not exist, it means that the copy data has not been completed, + # #and Wait for 0th card to finish copying data + # while not os.path.exists("/cache/download_input.txt"): + # time.sleep(1) + # return +def UploadToQizhi(train_dir, obs_train_url): + device_num = int(os.getenv('RANK_SIZE')) + local_rank=int(os.getenv('RANK_ID')) + if device_num == 1: + EnvToObs(train_dir, obs_train_url) + if device_num > 1: + if local_rank%8==0: + EnvToObs(train_dir, obs_train_url) + return \ No newline at end of file -- 2.34.1 From 7db8838cce93f73a2b4247f23de245723e21cc41 Mon Sep 17 00:00:00 2001 From: lemon Date: Sat, 28 Jan 2023 14:31:13 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'utils/qizhi=5Fconfig.?= =?UTF-8?q?py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/qizhi_config.py | 80 ------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 utils/qizhi_config.py diff --git a/utils/qizhi_config.py b/utils/qizhi_config.py deleted file mode 100644 index 8adc8bd..0000000 --- a/utils/qizhi_config.py +++ /dev/null @@ -1,80 +0,0 @@ -import argparse -import math -import mindspore - -from runner import read_yaml, TrainingWrapper - -from mindspore.context import ParallelMode -import mindspore.ops as ops -import time -import moxing as mox -from mindspore.train.callback import Callback -import os -import sys - -class UploadOutput(Callback): - def __init__(self, train_dir, obs_train_url): - self.train_dir = train_dir - self.obs_train_url = obs_train_url - def epoch_end(self,run_context): - try: - mox.file.copy_parallel(self.train_dir , self.obs_train_url ) - print("Successfully Upload {} to {}".format(self.train_dir ,self.obs_train_url )) - except Exception as e: - print('moxing upload {} to {} failed: '.format(self.train_dir ,self.obs_train_url ) + str(e)) - return - -### Copy single dataset from obs to training image### -def ObsToEnv(obs_data_url, data_dir): - try: - mox.file.copy_parallel(obs_data_url, data_dir) - print("Successfully Download {} to {}".format(obs_data_url, data_dir)) - except Exception as e: - print('moxing download {} to {} failed: '.format(obs_data_url, data_dir) + str(e)) - #Set a cache file to determine whether the data has been copied to obs. - #If this file exists during multi-card training, there is no need to copy the dataset multiple times. - f = open("/cache/download_input.txt", 'w') - f.close() - try: - if os.path.exists("/cache/download_input.txt"): - print("download_input succeed") - except Exception as e: - print("download_input failed") - return -### Copy the output to obs### -def EnvToObs(train_dir, obs_train_url): - try: - mox.file.copy_parallel(train_dir, obs_train_url) - print("Successfully Upload {} to {}".format(train_dir,obs_train_url)) - except Exception as e: - print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e)) - return -def DownloadFromQizhi(obs_data_url, data_dir): - device_num = int(os.getenv('RANK_SIZE')) - if device_num == 1: - ObsToEnv(obs_data_url,data_dir) - # context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target) - # if device_num > 1: - # # set device_id and init for multi-card training - # # context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=int(os.getenv('ASCEND_DEVICE_ID'))) - # # context.reset_auto_parallel_context() - # # context.set_auto_parallel_context(device_num = device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True) - # # init() - # #Copying obs data does not need to be executed multiple times, just let the 0th card copy the data - # local_rank=int(os.getenv('RANK_ID')) - # if local_rank%8==0: - # ObsToEnv(obs_data_url,data_dir) - # #If the cache file does not exist, it means that the copy data has not been completed, - # #and Wait for 0th card to finish copying data - # while not os.path.exists("/cache/download_input.txt"): - # time.sleep(1) - # return -def UploadToQizhi(train_dir, obs_train_url): - device_num = int(os.getenv('RANK_SIZE')) - local_rank=int(os.getenv('RANK_ID')) - if device_num == 1: - EnvToObs(train_dir, obs_train_url) - if device_num > 1: - if local_rank%8==0: - EnvToObs(train_dir, obs_train_url) - return \ No newline at end of file -- 2.34.1 From 2587a567a46393c7f49a8c9ad1ccc9f115fa23cd Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 29 Jan 2023 17:38:26 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20'eval.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval.py b/eval.py index d9ba1cc..abe2b66 100644 --- a/eval.py +++ b/eval.py @@ -215,7 +215,7 @@ if __name__ == '__main__': help='device where the code will be implemented (default: Ascend),if to use the CPU on the Qizhi platform:device_target=CPU') - parser.add_argument('--config', default='RetinaFace_mobilenet025.yaml', type=str ,help='config path') + parser.add_argument('--config', default='RetinaFace_resnet50.yaml', type=str ,help='config path') args = parser.parse_args() -- 2.34.1 From 0c59f0b91385384b5abaf1643f45ac0847f3f5d7 Mon Sep 17 00:00:00 2001 From: lemon Date: Sun, 29 Jan 2023 17:38:54 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20'infer.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infer.py b/infer.py index cb7e30e..1feab6a 100644 --- a/infer.py +++ b/infer.py @@ -130,7 +130,7 @@ def infer(cfg): if __name__ == '__main__': parser = argparse.ArgumentParser(description='infer') - parser.add_argument('--config', default='mindface/detection/configs/RetinaFace_mobilenet025.yaml', type=str, + parser.add_argument('--config', default='mindface/detection/configs/RetinaFace_resnet50.yaml', type=str, help='configs path') parser.add_argument('--checkpoint', type=str, default='', help='checpoint path') -- 2.34.1