CYing
/
mmllt
forked from Never_More/model_test

 
			
							import os
import argparse
import moxing as mox

parser = argparse.ArgumentParser(description='PaddleNLP ERNIE-tiny-Finetune Training Example')
# data_url,train_url是固定用于在modelarts上训练的参数，表示数据集的路径和输出模型的路径
parser.add_argument('--data_url',
                    help='path to training/inference dataset folder',
                    default='./label')

parser.add_argument('--train_url',
                    help='model folder to save/load',
                    default='./model')


parser.add_argument('--save_checkpoint_path',
                    type=str,
                    default="./ckpt",
                    help='if is test, must provide\
                    path where the trained ckpt file')
parser.add_argument(
    '--device_target',
    type=str,
    default="Ascend",
    choices=['Ascend', 'GPU', 'CPU'],
    help='device where the code will be implemented (default: Ascend)')

parser.add_argument(
    '--multi_data_url',
    help='path to training/inference dataset folder',
    default='/cache/data'
)
args = parser.parse_args()

try:
    """这部分代码是自动更新paddle与paddle-custom-device-npu"""
    os.system(f"wget https://paddle-device.bj.bcebos.com/develop/cpu/paddlepaddle-0.0.0-cp37-cp37m-linux_aarch64.whl")
    os.system(f"wget https://openi.pcl.ac.cn/Never_More/model_test/raw/branch/master/paddle_custom_npu-0.0.0-cp37-cp37m-linux_aarch64.whl")
    os.system(f"pip install --force-reinstall --no-deps paddlepaddle-0.0.0-cp37-cp37m-linux_aarch64.whl")
    os.system(f"pip install --force-reinstall paddle_custom_npu-0.0.0-cp37-cp37m-linux_aarch64.whl")
    print("更新最新paddle和paddle-custom成功")
except:
    print("更新最新paddle和paddle-custom失败")

try:
    """这部分代码是自动下载paddlenlp并自动配置环境"""
    os.system(f"export LD_PRELOAD=$LD_PRELOAD:/usr/lib64/libgomp.so.1")
    os.system(f"pip install filelock -i https://mirror.baidu.com/pypi/simple")
    os.system(f"git clone https://openi.pcl.ac.cn/PaddlePaddle/PaddleNLP.git")
    os.chdir('./PaddleNLP')
    os.system(f"pip install --upgrade -r requirements.txt -i https://mirror.baidu.com/pypi/simple")
    os.system(f"python setup.py install")
    os.system(f"pip install scikit-learn==1.0.2 scipy==1.7.3 paddleslim>=2.4 -i https://mirror.baidu.com/pypi/simple")
    print("依赖安装成功")
except:
    print("依赖修复失败")

try:
    """这部分代码下载、处理数据集"""
    os.chdir("./model_zoo/ernie-tiny")
    os.system("wget http://tcci.ccf.org.cn/conference/2018/dldoc/trainingdata04.zip")
    os.system("unzip trainingdata04.zip")
    os.system("cp task-slu-tencent.dingdang-v1.1/corpus.train.txt ./data/")
    oldpath = os.getcwd()
    os.chdir("./data")
    os.system("shuf corpus.train.txt > corpus.train.txt.shuf")
    os.system("num_lines=$(wc -l corpus.train.txt|awk '{print $1}') && head -n $[num_lines/5] corpus.train.txt.shuf > dev.txt && tail -n $[num_lines-num_lines/5] corpus.train.txt.shuf > train.txt")
    os.chdir(oldpath)
    print("数据集构建成功")
except:
    print("数据集构建失败")

try:

    """这部分代码主要是运行训练脚本的"""
    os.system("mkdir -p output/BS64_LR5e-5_EPOCHS30")

    os.system("python run_train.py \
                --device npu \
                --logging_steps 100 \
                --save_steps 100 \
                --eval_steps 100 \
                --model_name_or_path ernie-3.0-tiny-nano-v2-zh \
                --num_train_epochs 30 \
                --per_device_eval_batch_size 64 \
                --per_device_train_batch_size  64 \
                --learning_rate 5e-5 \
                --prune_embeddings \
                --max_vocab_size 6000 \
                --max_seq_length 16  \
                --output_dir output/BS64_LR5e-5_EPOCHS30 \
                --train_path data/train.txt \
                --dev_path data/dev.txt \
                --intent_label_path data/intent_label.txt \
                --slot_label_path data/slot_label.txt \
                --label_names  'intent_label' 'slot_label' \
                --weight_decay 0.01 \
                --warmup_ratio 0.1 \
                --do_train \
                --do_eval \
                --do_export \
                --input_dtype 'int32' \
                --disable_tqdm True \
                --overwrite_output_dir \
                --load_best_model_at_end  True \
                --save_total_limit 1 \
                --metric_for_best_model eval_accuracy")
    print("运行成功")
except:
    print("运行失败")

######################## 将输出的模型拷贝到obs（固定写法） ########################   
# 把训练后的模型数据从本地的运行环境拷贝回obs，在启智平台相对应的训练任务中会提供下载
try:
    train_dir = os.getcwd() + "/results.txt"
    mox.file.copy_parallel(train_dir, args.train_url)
    print("Successfully Upload {} to {}".format(train_dir,
                                                args.train_url))
except Exception as e:
    print('moxing upload {} to {} failed: '.format(train_dir,
                                                   args.train_url) + str(e))