diff --git a/models/UIE.ipynb b/models/UIE.ipynb index e0485a3..9567045 100644 --- a/models/UIE.ipynb +++ b/models/UIE.ipynb @@ -46,8 +46,11 @@ "# PaddleNLP 的 Python 依赖库在 requirements.txt 中给出,可通过如下命令安装:\n", "!pip install --upgrade -r requirements.txt -i https://mirror.baidu.com/pypi/simple\n", "\n", + "# 解决启智社区环境下的pip版本冲突\n", + "!pip install Pillow==9.0.1 requests==2.26.0 tqdm==4.62.3 typing-extensions==4.0.1 urllib3==1.26.2 click==8.0.3 colorama==0.4.4 tenacity==6.1.0 ipywidgets botocore==1.19.26 boto3==1.16.22 pydantic platformdirs -i https://mirror.baidu.com/pypi/simple\n", + "\n", "# 源码安装\n", - "%run setup.py install" + "!python setup.py install" ] }, { @@ -73,10 +76,14 @@ "metadata": {}, "outputs": [], "source": [ - "%cd model_zoo/uie && mkdir data && cd data\n", - "%wget https://bj.bcebos.com/paddlenlp/datasets/uie/doccano_ext.json && cd ..\n", + "%cd /home/ma-user/work/PaddleNLP/model_zoo/uie\n", + "%mkdir data\n", + "%cd data\n", + "\n", + "!wget https://bj.bcebos.com/paddlenlp/datasets/uie/doccano_ext.json", "\n", - "%run doccano.py --doccano_file ./data/doccano_ext.json --task_type ext --save_dir ./data --splits 0.8 0.2 0 --schema_lang ch" + "%cd ..\n", + "!python doccano.py --doccano_file ./data/doccano_ext.json --task_type ext --save_dir ./data --splits 0.8 0.2 0 --schema_lang ch" ] }, { @@ -100,35 +107,36 @@ "metadata": {}, "outputs": [], "source": [ + "%cd /home/ma-user/work/PaddleNLP/model_zoo/uie\n", "%env CUSTOM_DEVICE_BLACK_LIST=sigmoid\n", "%env FLAGS_selected_npu=0\n", "\n", - "%run finetune.py \\\n", - " --device npu \\\n", - " --logging_steps 10 \\\n", - " --save_steps 100 \\\n", - " --eval_steps 100 \\\n", - " --seed 42 \\\n", - " --model_name_or_path uie-base \\\n", - " --output_dir \"./checkpoint/model_best\" \\\n", - " --train_path data/train.txt \\\n", - " --dev_path data/dev.txt \\\n", - " --max_seq_length 512 \\\n", - " --per_device_eval_batch_size 16 \\\n", - " --per_device_train_batch_size 16 \\\n", - " --num_train_epochs 20 \\\n", - " --learning_rate 1e-5 \\\n", - " --label_names 'start_positions' 'end_positions' \\\n", - " --do_train \\\n", - " --do_eval \\\n", - " --do_export \\\n", - " --export_model_dir \"./checkpoint/model_best\" \\\n", - " --overwrite_output_dir \\\n", - " --disable_tqdm True \\\n", - " --metric_for_best_model eval_f1 \\\n", - " --load_best_model_at_end True \\\n", - " --save_total_limit 1 \\\n", - " --dynamic_max_length 16 32 64 128 256 512 " + "!python finetune.py \\\n", + " --device npu \\\n", + " --logging_steps 10 \\\n", + " --save_steps 100 \\\n", + " --eval_steps 100 \\\n", + " --seed 42 \\\n", + " --model_name_or_path uie-base \\\n", + " --output_dir \"./checkpoint/model_best\" \\\n", + " --train_path data/train.txt \\\n", + " --dev_path data/dev.txt \\\n", + " --max_seq_length 512 \\\n", + " --per_device_eval_batch_size 16 \\\n", + " --per_device_train_batch_size 16 \\\n", + " --num_train_epochs 20 \\\n", + " --learning_rate 1e-5 \\\n", + " --label_names 'start_positions' 'end_positions' \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --do_export \\\n", + " --export_model_dir \"./checkpoint/model_best\" \\\n", + " --overwrite_output_dir \\\n", + " --disable_tqdm True \\\n", + " --metric_for_best_model eval_f1 \\\n", + " --load_best_model_at_end True \\\n", + " --save_total_limit 1 \\\n", + " --dynamic_max_length 16 32 64 128 256 512 " ] }, { @@ -150,12 +158,14 @@ "metadata": {}, "outputs": [], "source": [ - "%run evaluate.py \\\n", - " --model_path ./checkpoint/model_best \\\n", - " --test_path ./data/dev.txt \\\n", - " --batch_size 16 \\\n", - " --device npu \\\n", - " --max_seq_len 512" + "%cd /home/ma-user/work/PaddleNLP/model_zoo/uie\n", + "\n", + "!python evaluate.py \\\n", + " --model_path ./checkpoint/model_best \\\n", + " --test_path ./data/dev.txt \\\n", + " --batch_size 16 \\\n", + " --device npu \\\n", + " --max_seq_len 512" ] }, { @@ -177,6 +187,8 @@ "metadata": {}, "outputs": [], "source": [ + "%cd /home/ma-user/work/PaddleNLP/model_zoo/uie\n", + "\n", "import os\n", "from pprint import pprint\n", "from paddlenlp import Taskflow\n",