|
- import os
- import argparse
- import moxing as mox
-
- parser = argparse.ArgumentParser(description='PaddleNLP GPT-Finetune Training Example')
- # data_url,train_url是固定用于在modelarts上训练的参数,表示数据集的路径和输出模型的路径
- parser.add_argument('--data_url',
- help='path to training/inference dataset folder',
- default='./label')
-
- parser.add_argument('--train_url',
- help='model folder to save/load',
- default='./model')
-
-
- parser.add_argument('--save_checkpoint_path',
- type=str,
- default="./ckpt",
- help='if is test, must provide\
- path where the trained ckpt file')
- parser.add_argument(
- '--device_target',
- type=str,
- default="Ascend",
- choices=['Ascend', 'GPU', 'CPU'],
- help='device where the code will be implemented (default: Ascend)')
-
- parser.add_argument(
- '--multi_data_url',
- help='path to training/inference dataset folder',
- default='/cache/data'
- )
- args = parser.parse_args()
-
- try:
- """这部分代码是自动更新paddle与paddle-custom-device-npu"""
- os.system(f"wget https://paddle-device.bj.bcebos.com/develop/cpu/paddlepaddle-0.0.0-cp37-cp37m-linux_aarch64.whl")
- os.system(f"wget https://openi.pcl.ac.cn/Never_More/model_test/raw/branch/master/paddle_custom_npu-0.0.0-cp37-cp37m-linux_aarch64.whl")
- os.system(f"pip install --force-reinstall --no-deps paddlepaddle-0.0.0-cp37-cp37m-linux_aarch64.whl")
- os.system(f"pip install --force-reinstall paddle_custom_npu-0.0.0-cp37-cp37m-linux_aarch64.whl")
- print("更新最新paddle和paddle-custom成功")
- except:
- print("更新最新paddle和paddle-custom失败")
-
- try:
- """这部分代码是自动下载paddlenlp并自动配置环境"""
- os.system(f"export LD_PRELOAD=$LD_PRELOAD:/usr/lib64/libgomp.so.1")
- os.system(f"pip install filelock -i https://mirror.baidu.com/pypi/simple")
- os.system(f"git clone https://openi.pcl.ac.cn/PaddlePaddle/PaddleNLP.git")
- os.chdir('./PaddleNLP')
- os.system(f"pip install --upgrade -r requirements.txt -i https://mirror.baidu.com/pypi/simple")
- os.system(f"python setup.py install")
- os.system(f"pip install scikit-learn==1.0.2 scipy==1.7.3 regex sentencepiece>=0.1.94 tqdm visualdl pybind11 -i https://mirror.baidu.com/pypi/simple")
- print("依赖安装成功")
- except:
- print("依赖修复失败")
-
- try:
-
- """这部分代码主要是运行训练脚本的"""
- os.chdir("./model_zoo/gpt")
- os.system("python run_glue.py \
- --model_name_or_path gpt2-medium-en \
- --task_name SST-2 \
- --max_seq_length 128 \
- --per_device_train_batch_size 32 \
- --learning_rate 2e-5 \
- --num_train_epochs 3 \
- --logging_steps 1 \
- --save_steps 500 \
- --save_total_limit 4 \
- --output_dir ./output_dir/glue \
- --eval_steps 500 \
- --device npu \
- --do_train true \
- --do_eval true \
- --flatten_param_grads \
- --lazy_data_processing=False")
- print("运行成功")
- except:
- print("运行失败")
-
- ######################## 将输出的模型拷贝到obs(固定写法) ########################
- # 把训练后的模型数据从本地的运行环境拷贝回obs,在启智平台相对应的训练任务中会提供下载
- try:
- train_dir = os.getcwd() + "/train.log"
- mox.file.copy_parallel(train_dir, args.train_url)
- print("Successfully Upload {} to {}".format(train_dir,
- args.train_url))
- except Exception as e:
- print('moxing upload {} to {} failed: '.format(train_dir,
- args.train_url) + str(e))
|