diff --git a/Season1.step_into_chatgpt/7.Prompt/roberta_sequence_classification.ipynb b/Season1.step_into_chatgpt/7.Prompt/roberta_sequence_classification.ipynb index 6cf7043..72660e5 100644 --- a/Season1.step_into_chatgpt/7.Prompt/roberta_sequence_classification.ipynb +++ b/Season1.step_into_chatgpt/7.Prompt/roberta_sequence_classification.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "8dabe994-5618-4f4f-9955-c8dc74870076", + "id": "d0a8f35f-c4f4-4591-a095-adf38dae8ad7", "metadata": {}, "source": [ "# 基于MindNLP的Roberta模型Prompt Tuning" @@ -10,112 +10,35 @@ }, { "cell_type": "markdown", - "id": "ebb7edc0-7482-448f-9dfe-b12cc7e1cab4", + "id": "7d18d959-5256-4c72-b3e7-18c306af5f90", "metadata": {}, "source": [ - "该实验可进行在线体验,在线体验链接(https://pangu.huaweicloud.com/gallery/asset-detail.html?id=016991f8-0e0d-44c8-96f7-8b2cad54c592\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "7e9aa619-6a28-4929-9c6c-2d88db8a1053", - "metadata": {}, - "source": [ - "## 环境安装\n", - "\n", - "运行如下两个代码块,创建python-3.9.0 kernel。\n", - "\n", - "> 注意:\n", - "> \n", - ">此为在线运行平台配置python3.9的指南,如在其他环境平台运行案例,请根据实际情况修改如下代码\n", - "> \n", - "> 以下两个代码块仅能运行一次,多次运行会出现kernel报错。\n", - ">\n", - "> 如出现多次运行导致的kernel报错,请终止实例(点击右上角“停止NoteBook实例”的圆形图标),并重启实例。" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3b0ef736-e7f1-46ac-866c-b6c76ef2c0d5", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture captured_output\n", - "!/home/ma-user/anaconda3/bin/conda create -n python-3.9.0 python=3.9.0 -y --override-channels --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main\n", - "!/home/ma-user/anaconda3/envs/python-3.9.0/bin/pip install ipykernel" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6c55998d-bdfc-45b5-b2e4-fb8d603a3be9", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "data = {\n", - " \"display_name\": \"python-3.9.0\",\n", - " \"env\": {\n", - " \"PATH\": \"/home/ma-user/anaconda3/envs/python-3.9.0/bin:/home/ma-user/anaconda3/envs/python-3.7.10/bin:/modelarts/authoring/notebook-conda/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ma-user/modelarts/ma-cli/bin:/home/ma-user/modelarts/ma-cli/bin\"\n", - " },\n", - " \"language\": \"python\",\n", - " \"argv\": [\n", - " \"/home/ma-user/anaconda3/envs/python-3.9.0/bin/python\",\n", - " \"-m\",\n", - " \"ipykernel\",\n", - " \"-f\",\n", - " \"{connection_file}\"\n", - " ]\n", - "}\n", + "## 环境配置\n", "\n", - "if not os.path.exists(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\"):\n", - " os.mkdir(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\")\n", + " python =3.9\n", + " mindspore = 2.3.1\n", + " mindnlp = 0.4.0\n", + " jieba\n", + " tiktoken\n", "\n", - "with open('/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/kernel.json', 'w') as f:\n", - " json.dump(data, f, indent=4)" + "**在线运行代码平台链接:**\n", + "- 1. [华为云AI Gallery](https://pangu.huaweicloud.com/gallery/asset-detail.html?id=016991f8-0e0d-44c8-96f7-8b2cad54c592)\n", + "- 2. [大模型平台AI实验室统一入口](https://xihe.mindspore.cn/projects)" ] }, { "cell_type": "markdown", - "id": "7b2ee72f-e907-4536-b295-cc2f1c64db3c", + "id": "32234a62-3149-47a8-aece-f5175f206bdc", "metadata": {}, "source": [ - "创建完成后,稍等片刻,或刷新页面。如下图所示,点击右上角(或左上角)kernel选择python-3.9.0。\n", + "## 模型与数据集加载\n", "\n", - "![change-kernel](https://mindspore-demo.obs.cn-north-4.myhuaweicloud.com/imgs/ai-gallery/change-kernel.PNG)" - ] - }, - { - "cell_type": "markdown", - "id": "b75eb5c6-11a6-43df-9067-fa15622cd517", - "metadata": {}, - "source": [ - "安装mindspore, mindnlp及其他依赖" + "本案例对roberta-large模型基于GLUE基准数据集进行prompt tuning。" ] }, { "cell_type": "code", "execution_count": 1, - "id": "b6bd85ef-29bf-40d5-8fef-00dfaab01a0d", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture captured_output\n", - "\n", - "!pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/2.2.14/MindSpore/unified/x86_64/mindspore-2.2.14-cp39-cp39-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple\n", - "!pip install mindnlp\n", - "!pip install ipywidgets\n", - "!pip install tqdm==4.65.0\n", - "!pip install scikit-learn" - ] - }, - { - "cell_type": "code", - "execution_count": 2, "id": "7228a58b-4f81-4f5d-ac6c-d9439b3f4447", "metadata": {}, "outputs": [ @@ -131,39 +54,50 @@ "%env HF_ENDPOINT=https://hf-mirror.com" ] }, - { - "cell_type": "markdown", - "id": "4cc505fa-31a4-4a07-8f9e-26bec54f4cdb", - "metadata": {}, - "source": [ - "## 模型与数据集加载\n", - "\n", - "本案例对roberta-large模型基于GLUE基准数据集进行prompt tuning。" - ] - }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "9ff5004e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "Building prefix dict from the default dictionary ...\n", + "Loading model from cache /tmp/jieba.cache\n", + "Loading model cost 0.931 seconds.\n", + "Prefix dict has been built successfully.\n", + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/Cython/Compiler/Main.py:381: FutureWarning: Cython directive 'language_level' not set, using '3str' for now (Py3). This has changed from earlier releases! File: /home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/mindnlp/transformers/models/graphormer/algos_graphormer.pyx\n", + " tree = Parsing.p_module(s, pxd, full_module_name)\n", + "In file included from /home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929,\n", + " from /home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,\n", + " from /home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/include/numpy/arrayobject.h:5,\n", + " from /home/lvyufeng/.pyxbld/temp.linux-aarch64-cpython-39/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/mindnlp/transformers/models/graphormer/algos_graphormer.c:1240:\n", + "/home/lvyufeng/miniconda3/envs/mindspore/lib/python3.9/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning \"Using deprecated NumPy API, disable it with \" \"#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION\" [-Wcpp]\n", + " 17 | #warning \"Using deprecated NumPy API, disable it with \" \\\n", + " | ^~~~~~~\n" + ] + } + ], "source": [ - "import argparse\n", - "import os\n", - "\n", "import mindspore\n", - "from mindspore.experimental.optim import AdamW\n", "from tqdm import tqdm\n", - "import evaluate\n", + "from mindnlp import evaluate\n", "from mindnlp.dataset import load_dataset\n", - "from mindnlp.engine import set_seed\n", "from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer\n", - "from mindnlp.modules.optimization import get_linear_schedule_with_warmup\n", + "from mindnlp.core.optim import AdamW\n", + "from mindnlp.common.optimization import get_linear_schedule_with_warmup\n", "from mindnlp.peft import (\n", - " get_peft_config,\n", " get_peft_model,\n", - " get_peft_model_state_dict,\n", - " set_peft_model_state_dict,\n", " PeftType,\n", " PromptTuningConfig,\n", ")" @@ -171,22 +105,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "e32c4a9e", "metadata": {}, "outputs": [], "source": [ "batch_size = 32\n", - "model_name_or_path = \"AI-ModelScope/roberta-large\"\n", + "model_name_or_path = \"roberta-large\"\n", "task = \"mrpc\"\n", "peft_type = PeftType.PROMPT_TUNING\n", - "# num_epochs = 20\n", - "num_epochs = 5" + "num_epochs = 20" ] }, { "cell_type": "markdown", - "id": "5dcf6f6e-2d5e-4342-b264-67c7419374d4", + "id": "44950094-babf-4834-be57-e70a032754a8", "metadata": {}, "source": [ "prompt tuning配置,任务类型选为\"SEQ_CLS\", 即序列分类。" @@ -194,20 +127,18 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "622fe9c8", "metadata": {}, "outputs": [], "source": [ - "# peft config\n", "peft_config = PromptTuningConfig(task_type=\"SEQ_CLS\", num_virtual_tokens=10)\n", - "# learning rate\n", "lr = 1e-3" ] }, { "cell_type": "markdown", - "id": "2474cad0-3b50-4ba1-aea4-2787955f41c7", + "id": "b4491967-c060-4064-8b43-30d9728cfb03", "metadata": {}, "source": [ "加载tokenizer。如模型为GPT、OPT或BLOOM类模型,从序列左侧添加padding,其他情况下从序列右侧添加padding。" @@ -215,183 +146,36 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "74e9efe0", "metadata": {}, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c6f49682e9da4a26afdf8f040f76f1e0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0.00/482 [00:00