|
@@ -15,15 +15,15 @@ |
|
|
"\n", |
|
|
"\n", |
|
|
"    1.3   trainer 内部初始化 evaluater\n", |
|
|
"    1.3   trainer 内部初始化 evaluater\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  2   使用 trainer 训练模型\n", |
|
|
|
|
|
|
|
|
"  2   使用 fastNLP 0.8 搭建 argmax 模型\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"    2.1   argmax 模型实例\n", |
|
|
|
|
|
|
|
|
"    2.1   trainer_step 和 evaluator_step\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"    2.2   trainer 的参数匹配\n", |
|
|
|
|
|
|
|
|
"    2.2   trainer 和 evaluator 的参数匹配\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"    2.3   trainer 的实际使用 \n", |
|
|
|
|
|
|
|
|
"    2.3   一个实际案例:argmax 模型\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  3   使用 evaluator 评测模型\n", |
|
|
|
|
|
|
|
|
"  3   使用 fastNLP 0.8 训练 argmax 模型\n", |
|
|
" \n", |
|
|
" \n", |
|
|
"    3.1   trainer 外部初始化的 evaluator\n", |
|
|
"    3.1   trainer 外部初始化的 evaluator\n", |
|
|
"\n", |
|
|
"\n", |
|
@@ -50,21 +50,21 @@ |
|
|
"\n", |
|
|
"\n", |
|
|
"```python\n", |
|
|
"```python\n", |
|
|
"trainer = Trainer(\n", |
|
|
"trainer = Trainer(\n", |
|
|
" model=model,\n", |
|
|
|
|
|
" train_dataloader=train_dataloader,\n", |
|
|
|
|
|
" optimizers=optimizer,\n", |
|
|
|
|
|
|
|
|
" model=model, # 模型基于 torch.nn.Module\n", |
|
|
|
|
|
" train_dataloader=train_dataloader, # 加载模块基于 torch.utils.data.DataLoader \n", |
|
|
|
|
|
" optimizers=optimizer, # 优化模块基于 torch.optim.*\n", |
|
|
"\t...\n", |
|
|
"\t...\n", |
|
|
"\tdriver=\"torch\",\n", |
|
|
|
|
|
"\tdevice=0,\n", |
|
|
|
|
|
|
|
|
"\tdriver=\"torch\", # 使用 pytorch 模块进行训练 \n", |
|
|
|
|
|
"\tdevice='cuda', # 使用 GPU:0 显卡执行训练\n", |
|
|
"\t...\n", |
|
|
"\t...\n", |
|
|
")\n", |
|
|
")\n", |
|
|
"...\n", |
|
|
"...\n", |
|
|
"evaluator = Evaluator(\n", |
|
|
"evaluator = Evaluator(\n", |
|
|
" model=model,\n", |
|
|
|
|
|
" dataloaders=evaluate_dataloader,\n", |
|
|
|
|
|
" metrics={'acc': Accuracy()} \n", |
|
|
|
|
|
|
|
|
" model=model, # 模型基于 torch.nn.Module\n", |
|
|
|
|
|
" dataloaders=evaluate_dataloader, # 加载模块基于 torch.utils.data.DataLoader\n", |
|
|
|
|
|
" metrics={'acc': Accuracy()}, # 测评方法使用 fastNLP.core.metrics.Accuracy \n", |
|
|
" ...\n", |
|
|
" ...\n", |
|
|
" driver=trainer.driver,\n", |
|
|
|
|
|
|
|
|
" driver=trainer.driver, # 保持同 trainer 的 driver 一致\n", |
|
|
"\tdevice=None,\n", |
|
|
"\tdevice=None,\n", |
|
|
" ...\n", |
|
|
" ...\n", |
|
|
")\n", |
|
|
")\n", |
|
@@ -88,7 +88,7 @@ |
|
|
"\n", |
|
|
"\n", |
|
|
"注:在同一脚本中,`Trainer`和`Evaluator`使用的`driver`应当保持一致\n", |
|
|
"注:在同一脚本中,`Trainer`和`Evaluator`使用的`driver`应当保持一致\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  一个不能违背的原则在于:**不要将多卡的`driver`前使用单卡的`driver`**(???),这样使用可能会带来很多意想不到的错误。" |
|
|
|
|
|
|
|
|
"  一个不能违背的原则在于:**不要将多卡的`driver`前使用单卡的`driver`**(???),这样使用可能会带来很多意想不到的错误" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -109,10 +109,10 @@ |
|
|
" optimizers=optimizer,\n", |
|
|
" optimizers=optimizer,\n", |
|
|
"\t...\n", |
|
|
"\t...\n", |
|
|
"\tdriver=\"torch\",\n", |
|
|
"\tdriver=\"torch\",\n", |
|
|
"\tdevice=0,\n", |
|
|
|
|
|
|
|
|
"\tdevice='cuda',\n", |
|
|
"\t...\n", |
|
|
"\t...\n", |
|
|
" evaluate_dataloaders=evaluate_dataloader,\n", |
|
|
|
|
|
" metrics={'acc': Accuracy()},\n", |
|
|
|
|
|
|
|
|
" evaluate_dataloaders=evaluate_dataloader, # 传入参数 evaluator_dataloaders\n", |
|
|
|
|
|
" metrics={'acc': Accuracy()}, # 传入参数 metrics\n", |
|
|
"\t...\n", |
|
|
"\t...\n", |
|
|
")\n", |
|
|
")\n", |
|
|
"```" |
|
|
"```" |
|
@@ -123,7 +123,7 @@ |
|
|
"id": "0c9c7dda", |
|
|
"id": "0c9c7dda", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"## 2. 使用 trainer 训练模型" |
|
|
|
|
|
|
|
|
"## 2. argmax 模型的搭建实例" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -131,71 +131,41 @@ |
|
|
"id": "524ac200", |
|
|
"id": "524ac200", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"### 2.1 argmax 模型实例\n", |
|
|
|
|
|
|
|
|
"### 2.1 trainer_step 和 evaluator_step\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"本节将通过训练`argmax`模型,简单介绍如何`Trainer`模块的使用方式\n", |
|
|
|
|
|
|
|
|
"在`fastNLP 0.8`中,使用`pytorch.nn.Module`搭建需要训练的模型,在搭建模型过程中,除了\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  使用`pytorch`定义`argmax`模型,输入一组固定维度的向量,输出其中数值最大的数的索引\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  除了添加`pytorch`要求的`forward`方法外,还需要添加 **`train_step`** 和 **`evaluate_step`** 这两个方法" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "5314482b", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"pycharm": { |
|
|
|
|
|
"is_executing": true |
|
|
|
|
|
} |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch\n", |
|
|
|
|
|
"import torch.nn as nn\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"class ArgMaxModel(nn.Module):\n", |
|
|
|
|
|
" def __init__(self, num_labels, feature_dimension):\n", |
|
|
|
|
|
" super(ArgMaxModel, self).__init__()\n", |
|
|
|
|
|
" self.num_labels = num_labels\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" self.linear1 = nn.Linear(in_features=feature_dimension, out_features=10)\n", |
|
|
|
|
|
" self.ac1 = nn.ReLU()\n", |
|
|
|
|
|
" self.linear2 = nn.Linear(in_features=10, out_features=10)\n", |
|
|
|
|
|
" self.ac2 = nn.ReLU()\n", |
|
|
|
|
|
" self.output = nn.Linear(in_features=10, out_features=num_labels)\n", |
|
|
|
|
|
" self.loss_fn = nn.CrossEntropyLoss()\n", |
|
|
|
|
|
|
|
|
"  添加`pytorch`要求的`forward`方法外,还需要添加 **`train_step`** 和 **`evaluate_step`** 这两个方法\n", |
|
|
|
|
|
"***\n", |
|
|
|
|
|
"```python\n", |
|
|
|
|
|
"class Model(torch.nn.Module):\n", |
|
|
|
|
|
" def __init__(self):\n", |
|
|
|
|
|
" super(Model, self).__init__()\n", |
|
|
|
|
|
" self.loss_fn = torch.nn.CrossEntropyLoss()\n", |
|
|
|
|
|
" pass\n", |
|
|
"\n", |
|
|
"\n", |
|
|
" def forward(self, x):\n", |
|
|
" def forward(self, x):\n", |
|
|
" x = self.ac1(self.linear1(x))\n", |
|
|
|
|
|
" x = self.ac2(self.linear2(x))\n", |
|
|
|
|
|
" x = self.output(x)\n", |
|
|
|
|
|
" return x\n", |
|
|
|
|
|
|
|
|
" pass\n", |
|
|
"\n", |
|
|
"\n", |
|
|
" def train_step(self, x, y):\n", |
|
|
" def train_step(self, x, y):\n", |
|
|
" x = self(x)\n", |
|
|
|
|
|
" return {\"loss\": self.loss_fn(x, y)}\n", |
|
|
|
|
|
|
|
|
" pred = self(x)\n", |
|
|
|
|
|
" return {\"loss\": self.loss_fn(pred, y)}\n", |
|
|
"\n", |
|
|
"\n", |
|
|
" def evaluate_step(self, x, y):\n", |
|
|
" def evaluate_step(self, x, y):\n", |
|
|
" x = self(x)\n", |
|
|
|
|
|
" x = torch.max(x, dim=-1)[1]\n", |
|
|
|
|
|
" return {\"pred\": x, \"target\": y}" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "ca897322", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
|
|
|
" pred = self(x)\n", |
|
|
|
|
|
" pred = torch.max(pred, dim=-1)[1]\n", |
|
|
|
|
|
" return {\"pred\": pred, \"target\": y}\n", |
|
|
|
|
|
"```\n", |
|
|
|
|
|
"***\n", |
|
|
"在`fastNLP 0.8`中,**函数`train_step`是`Trainer`中参数`train_fn`的默认值**\n", |
|
|
"在`fastNLP 0.8`中,**函数`train_step`是`Trainer`中参数`train_fn`的默认值**\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  由于,在`Trainer`训练时,**`Trainer`通过参数`_train_fn_`对应的模型方法获得当前数据批次的损失值**\n", |
|
|
|
|
|
|
|
|
"  由于,在`Trainer`训练时,**`Trainer`通过参数`train_fn`对应的模型方法获得当前数据批次的损失值**\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  因此,在`Trainer`训练时,`Trainer`首先会寻找模型是否定义了`train_step`这一方法\n", |
|
|
"  因此,在`Trainer`训练时,`Trainer`首先会寻找模型是否定义了`train_step`这一方法\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"    如果没有找到,那么`Trainer`会默认使用模型的`forward`函数来进行训练的前向传播过程\n", |
|
|
"    如果没有找到,那么`Trainer`会默认使用模型的`forward`函数来进行训练的前向传播过程\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"注:在`fastNLP 0.8`中,`Trainer`要求模型通过`train_step`来返回一个字典,将损失值作为`loss`的键值\n", |
|
|
|
|
|
|
|
|
"注:在`fastNLP 0.8`中,**`Trainer`要求模型通过`train_step`来返回一个字典**,**满足如`{\"loss\": loss}`的形式**\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"  此外,这里也可以通过传入`Trainer`的参数`output_mapping`来实现高度化的定制,具体请见这一note(???)\n", |
|
|
"  此外,这里也可以通过传入`Trainer`的参数`output_mapping`来实现高度化的定制,具体请见这一note(???)\n", |
|
|
"\n", |
|
|
"\n", |
|
@@ -205,7 +175,11 @@ |
|
|
"\n", |
|
|
"\n", |
|
|
"  从用户角度,模型通过`evaluate_step`方法来返回一个字典,内容与传入`Evaluator`的`metrics`一致\n", |
|
|
"  从用户角度,模型通过`evaluate_step`方法来返回一个字典,内容与传入`Evaluator`的`metrics`一致\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"<!--   从模块角度,`fastNLP 0.8`会匹配该字典的键值和一个`metric`的更新函数的函数签名,自动地将`metric`所需要的内容传给该`metric`,也就是我们会自动进行“**参数匹配**”。 -->" |
|
|
|
|
|
|
|
|
"  从模块角度,该字典的键值和`metric`中的`update`函数的签名一致,这样的机制在传参时被称为“**参数匹配**”\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"***\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"![fastNLP 0.8 中,Trainer 和 Evaluator 的关系图](./figures/T0-fig-trainer-and-evaluator.png)" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -213,13 +187,52 @@ |
|
|
"id": "fb3272eb", |
|
|
"id": "fb3272eb", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"### 2.2 trainer 的参数匹配\n", |
|
|
|
|
|
|
|
|
"### 2.2 trainer 和 evaluator 的参数匹配\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"在`fastNLP 0.8`中,参数匹配涉及到两个方面,分别是在\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  一方面,**在模型的前向传播中**,**`dataloader`向`train_step`或`evaluate_step`函数传递`batch`**\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  另方面,**在模型的评测过程中**,**`evaluate_dataloader`向`metric`的`update`函数传递`batch`**\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"`fastNLP 0.8`中的参数匹配涉及到两个方面,一是在模型训练或者评测的前向传播过程中,如果从`dataloader`中出来一个`batch`的数据是一个字典,那么我们会查看模型的`train_step`和`evaluate_step`方法的参数签名,然后对于每一个参数,我们会根据其名字从 batch 这一字典中选择出对应的数据传入进去。例如在接下来的定义`Dataset`的部分,注意`ArgMaxDatset`的`__getitem__`方法,您可以通过在`Trainer`和`Evaluator`中设置参数 `model_wo_auto_param_call`来关闭这一行为。当您关闭了这一行为后,我们会将`batch`直接传给您的`train_step`、`evaluate_step`或者 `forward`函数。\n", |
|
|
|
|
|
|
|
|
"对于前者,在`Trainer`和`Evaluator`中的参数`model_wo_auto_param_call`被设置为`False`时\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"二是在传入`Trainer`或者`Evaluator metrics`后,我们会在需要评测的时间点主动调用`metrics`来对`evaluate_dataloaders`进行评测,这一功能主要就是通过对`metrics`的`update`方法和一个`batch`的数据进行参数评测实现的。首先需要明确的是一个 metric 的计算通常分为 `update` 和 `get_metric`两步,其中`update`表示更新一个`batch`的评测数据,`get_metric` 表示根据已经得到的评测数据计算出最终的评测值,例如对于 `Accuracy`来说,其在`update`的时候会更新一个`batch`计算正确的数量 right_num 和计算错误的数量 total_num,最终在 `get_metric` 时返回评测值`right_num / total_num`。\n", |
|
|
|
|
|
|
|
|
"    **`fastNLP 0.8`要求`dataloader`生成的每个`batch`**,**满足如`{\"x\": x, \"y\": y}`的形式**\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  同时,`fastNLP 0.8`会查看模型的`train_step`和`evaluate_step`方法的参数签名,并为对应参数传入对应数值\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    **字典形式的定义**,**对应在`Dataset`定义的`__getitem__`方法中**,例如下方的`ArgMaxDatset`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  而在`Trainer`和`Evaluator`中的参数`model_wo_auto_param_call`被设置为`True`时\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    `fastNLP 0.8`会将`batch`直接传给模型的`train_step`、`evaluate_step`或`forward`函数\n", |
|
|
|
|
|
"***\n", |
|
|
|
|
|
"```python\n", |
|
|
|
|
|
"class Dataset(torch.utils.data.Dataset):\n", |
|
|
|
|
|
" def __init__(self, x, y):\n", |
|
|
|
|
|
" self.x = x\n", |
|
|
|
|
|
" self.y = y\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" def __len__(self):\n", |
|
|
|
|
|
" return len(self.x)\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" def __getitem__(self, item):\n", |
|
|
|
|
|
" return {\"x\": self.x[item], \"y\": self.y[item]}\n", |
|
|
|
|
|
"```\n", |
|
|
|
|
|
"***\n", |
|
|
|
|
|
"对于后者,首先要明确,在`Trainer`和`Evaluator`中,`metrics`的计算分为`update`和`get_metric`两步\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"因为`fastNLP 0.8`的`metrics`是自动计算的(只需要传给`Trainer`或者`Evaluator`),因此其一定依赖于参数匹配。对于从`evaluate_dataloader`中生成的一个`batch`的数据,我们会查看传给 `Trainer`(最终是传给`Evaluator`)和`Evaluator`的每一个`metric`,然后查看其`update`函数的函数签名,然后根据每一个参数的名字从`batch`字典中选择出对应的数据传入进去。" |
|
|
|
|
|
|
|
|
"    **`update`函数**,**针对一个`batch`的预测结果**,计算其累计的评价指标\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    **`get_metric`函数**,**统计`update`函数累计的评价指标**,来计算最终的评价结果\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  例如对于`Accuracy`来说,`update`函数会更新一个`batch`的正例数量`right_num`和负例数量`total_num`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    而`get_metric`函数则会返回所有`batch`的评测值`right_num / total_num`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  在此基础上,**`fastNLP 0.8`要求`evaluate_dataloader`生成的每个`batch`传递给对应的`metric`**\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    **以`{\"pred\": y_pred, \"target\": y_true}`的形式**,对应其`update`函数的函数签名" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -227,9 +240,65 @@ |
|
|
"id": "f62b7bb1", |
|
|
"id": "f62b7bb1", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"### 2.3 trainer的实际使用\n", |
|
|
|
|
|
|
|
|
"### 2.3 一个实际案例:argmax 模型\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"接下来我们创建用于训练的 dataset,其接受三个参数:数据维度、数据量和随机数种子,生成指定数量的维度为 `feature_dimension` 向量,而每一个向量的标签就是该向量中最大值的索引。" |
|
|
|
|
|
|
|
|
"下文将通过训练`argmax`模型,简单介绍如何`Trainer`模块的使用方式\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  首先,使用`pytorch.nn.Module`定义`argmax`模型,目标是输入一组固定维度的向量,输出其中数值最大的数的索引" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 1, |
|
|
|
|
|
"id": "5314482b", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"pycharm": { |
|
|
|
|
|
"is_executing": false |
|
|
|
|
|
} |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch\n", |
|
|
|
|
|
"import torch.nn as nn\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"class ArgMaxModel(nn.Module):\n", |
|
|
|
|
|
" def __init__(self, num_labels, feature_dimension):\n", |
|
|
|
|
|
" super(ArgMaxModel, self).__init__()\n", |
|
|
|
|
|
" self.num_labels = num_labels\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" self.linear1 = nn.Linear(in_features=feature_dimension, out_features=10)\n", |
|
|
|
|
|
" self.ac1 = nn.ReLU()\n", |
|
|
|
|
|
" self.linear2 = nn.Linear(in_features=10, out_features=10)\n", |
|
|
|
|
|
" self.ac2 = nn.ReLU()\n", |
|
|
|
|
|
" self.output = nn.Linear(in_features=10, out_features=num_labels)\n", |
|
|
|
|
|
" self.loss_fn = nn.CrossEntropyLoss()\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" def forward(self, x):\n", |
|
|
|
|
|
" pred = self.ac1(self.linear1(x))\n", |
|
|
|
|
|
" pred = self.ac2(self.linear2(pred))\n", |
|
|
|
|
|
" pred = self.output(pred)\n", |
|
|
|
|
|
" return pred\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" def train_step(self, x, y):\n", |
|
|
|
|
|
" pred = self(x)\n", |
|
|
|
|
|
" return {\"loss\": self.loss_fn(pred, y)}\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
" def evaluate_step(self, x, y):\n", |
|
|
|
|
|
" pred = self(x)\n", |
|
|
|
|
|
" pred = torch.max(pred, dim=-1)[1]\n", |
|
|
|
|
|
" return {\"pred\": pred, \"target\": y}" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "71f3fa6b", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"  接着,使用`torch.utils.data.Dataset`定义`ArgMaxDataset`数据集\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    数据集包含三个参数:维度`feature_dimension`、数据量`data_num`和随机种子`seed`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    数据及初始化是,自动生成指定维度的向量,并为每个向量标注出其中最大值的索引作为预测标签" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -245,7 +314,7 @@ |
|
|
"source": [ |
|
|
"source": [ |
|
|
"from torch.utils.data import Dataset\n", |
|
|
"from torch.utils.data import Dataset\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"class ArgMaxDatset(Dataset):\n", |
|
|
|
|
|
|
|
|
"class ArgMaxDataset(Dataset):\n", |
|
|
" def __init__(self, feature_dimension, data_num=1000, seed=0):\n", |
|
|
" def __init__(self, feature_dimension, data_num=1000, seed=0):\n", |
|
|
" self.num_labels = feature_dimension\n", |
|
|
" self.num_labels = feature_dimension\n", |
|
|
" self.feature_dimension = feature_dimension\n", |
|
|
" self.feature_dimension = feature_dimension\n", |
|
@@ -269,7 +338,9 @@ |
|
|
"id": "2cb96332", |
|
|
"id": "2cb96332", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"现在准备好数据和模型。" |
|
|
|
|
|
|
|
|
"  然后,根据`ArgMaxModel`类初始化模型实例,保持输入维度`feature_dimension`和输出标签数量`num_labels`一致\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    再根据`ArgMaxDataset`类初始化两个数据集实例,分别用来模型测试和模型评测,数据量各1000笔" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -283,16 +354,10 @@ |
|
|
}, |
|
|
}, |
|
|
"outputs": [], |
|
|
"outputs": [], |
|
|
"source": [ |
|
|
"source": [ |
|
|
"from torch.utils.data import DataLoader\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"train_dataset = ArgMaxDatset(feature_dimension=10, data_num=1000)\n", |
|
|
|
|
|
"evaluate_dataset = ArgMaxDatset(feature_dimension=10, data_num=100)\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)\n", |
|
|
|
|
|
"evaluate_dataloader = DataLoader(evaluate_dataset, batch_size=8)\n", |
|
|
|
|
|
|
|
|
"model = ArgMaxModel(num_labels=10, feature_dimension=10)\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"# num_labels 设置为 10,与 feature_dimension 保持一致,因为我们是预测十个位置中哪一个的概率最大。\n", |
|
|
|
|
|
"model = ArgMaxModel(num_labels=10, feature_dimension=10)" |
|
|
|
|
|
|
|
|
"train_dataset = ArgMaxDataset(feature_dimension=10, data_num=1000)\n", |
|
|
|
|
|
"evaluate_dataset = ArgMaxDataset(feature_dimension=10, data_num=100)" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -300,12 +365,33 @@ |
|
|
"id": "4e7d25ee", |
|
|
"id": "4e7d25ee", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"将优化器也定义好。" |
|
|
|
|
|
|
|
|
"  此外,使用`torch.utils.data.DataLoader`初始化两个数据加载模块,批量大小同为8,分别用于训练和测评" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 4, |
|
|
"execution_count": 4, |
|
|
|
|
|
"id": "363b5b09", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"from torch.utils.data import DataLoader\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)\n", |
|
|
|
|
|
"evaluate_dataloader = DataLoader(evaluate_dataset, batch_size=8)" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "c8d4443f", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"  最后,使用`torch.optim.SGD`初始化一个优化模块,基于随机梯度下降法" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 5, |
|
|
"id": "dc28a2d9", |
|
|
"id": "dc28a2d9", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
@@ -321,15 +407,33 @@ |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "4f1fba81", |
|
|
|
|
|
|
|
|
"id": "eb8ca6cf", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"## 3. 使用 fastNLP 0.8 训练 argmax 模型\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"### 3.1 trainer 外部初始化的 evaluator" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "55145553", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"现在万事俱备,开始使用 Trainer 进行训练!" |
|
|
|
|
|
|
|
|
"通过从`fastNLP`库中导入`Trainer`类,初始化`trainer`实例,对模型进行训练\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  需要导入预先定义好的模型`model`、对应的数据加载模块`train_dataloader`、优化模块`optimizer`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  通过`progress_bar`设定进度条格式,默认为`\"auto\"`,此外还有`\"rich\"`、`\"raw\"`和`None`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    但对于`\"auto\"`和`\"rich\"`格式,训练结束后进度条会不显示(???)\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  通过`n_epochs`设定优化迭代轮数,默认为20;全部`Trainer`的全部变量与函数可以通过`dir(trainer)`查询" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 5, |
|
|
|
|
|
|
|
|
"execution_count": 6, |
|
|
"id": "b51b7a2d", |
|
|
"id": "b51b7a2d", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
@@ -349,167 +453,20 @@ |
|
|
}, |
|
|
}, |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"output_type": "display_data" |
|
|
"output_type": "display_data" |
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/plain": [ |
|
|
|
|
|
"['__annotations__',\n", |
|
|
|
|
|
" '__class__',\n", |
|
|
|
|
|
" '__delattr__',\n", |
|
|
|
|
|
" '__dict__',\n", |
|
|
|
|
|
" '__dir__',\n", |
|
|
|
|
|
" '__doc__',\n", |
|
|
|
|
|
" '__eq__',\n", |
|
|
|
|
|
" '__format__',\n", |
|
|
|
|
|
" '__ge__',\n", |
|
|
|
|
|
" '__getattribute__',\n", |
|
|
|
|
|
" '__gt__',\n", |
|
|
|
|
|
" '__hash__',\n", |
|
|
|
|
|
" '__init__',\n", |
|
|
|
|
|
" '__init_subclass__',\n", |
|
|
|
|
|
" '__le__',\n", |
|
|
|
|
|
" '__lt__',\n", |
|
|
|
|
|
" '__module__',\n", |
|
|
|
|
|
" '__ne__',\n", |
|
|
|
|
|
" '__new__',\n", |
|
|
|
|
|
" '__reduce__',\n", |
|
|
|
|
|
" '__reduce_ex__',\n", |
|
|
|
|
|
" '__repr__',\n", |
|
|
|
|
|
" '__setattr__',\n", |
|
|
|
|
|
" '__sizeof__',\n", |
|
|
|
|
|
" '__str__',\n", |
|
|
|
|
|
" '__subclasshook__',\n", |
|
|
|
|
|
" '__weakref__',\n", |
|
|
|
|
|
" '_check_callback_called_legality',\n", |
|
|
|
|
|
" '_check_train_batch_loop_legality',\n", |
|
|
|
|
|
" '_custom_callbacks',\n", |
|
|
|
|
|
" '_driver',\n", |
|
|
|
|
|
" '_evaluate_dataloaders',\n", |
|
|
|
|
|
" '_fetch_matched_fn_callbacks',\n", |
|
|
|
|
|
" '_set_num_eval_batch_per_dl',\n", |
|
|
|
|
|
" '_train_batch_loop',\n", |
|
|
|
|
|
" '_train_dataloader',\n", |
|
|
|
|
|
" '_train_step',\n", |
|
|
|
|
|
" '_train_step_signature_fn',\n", |
|
|
|
|
|
" 'accumulation_steps',\n", |
|
|
|
|
|
" 'add_callback_fn',\n", |
|
|
|
|
|
" 'backward',\n", |
|
|
|
|
|
" 'batch_idx_in_epoch',\n", |
|
|
|
|
|
" 'batch_step_fn',\n", |
|
|
|
|
|
" 'callback_manager',\n", |
|
|
|
|
|
" 'check_batch_step_fn',\n", |
|
|
|
|
|
" 'cur_epoch_idx',\n", |
|
|
|
|
|
" 'data_device',\n", |
|
|
|
|
|
" 'dataloader',\n", |
|
|
|
|
|
" 'device',\n", |
|
|
|
|
|
" 'driver',\n", |
|
|
|
|
|
" 'driver_name',\n", |
|
|
|
|
|
" 'epoch_validate',\n", |
|
|
|
|
|
" 'evaluate_batch_step_fn',\n", |
|
|
|
|
|
" 'evaluate_dataloaders',\n", |
|
|
|
|
|
" 'evaluate_every',\n", |
|
|
|
|
|
" 'evaluate_fn',\n", |
|
|
|
|
|
" 'evaluator',\n", |
|
|
|
|
|
" 'extract_loss_from_outputs',\n", |
|
|
|
|
|
" 'fp16',\n", |
|
|
|
|
|
" 'get_no_sync_context',\n", |
|
|
|
|
|
" 'global_forward_batches',\n", |
|
|
|
|
|
" 'has_checked_train_batch_loop',\n", |
|
|
|
|
|
" 'input_mapping',\n", |
|
|
|
|
|
" 'kwargs',\n", |
|
|
|
|
|
" 'larger_better',\n", |
|
|
|
|
|
" 'load',\n", |
|
|
|
|
|
" 'load_model',\n", |
|
|
|
|
|
" 'marker',\n", |
|
|
|
|
|
" 'metrics',\n", |
|
|
|
|
|
" 'model',\n", |
|
|
|
|
|
" 'model_device',\n", |
|
|
|
|
|
" 'monitor',\n", |
|
|
|
|
|
" 'move_data_to_device',\n", |
|
|
|
|
|
" 'n_epochs',\n", |
|
|
|
|
|
" 'num_batches_per_epoch',\n", |
|
|
|
|
|
" 'on',\n", |
|
|
|
|
|
" 'on_after_backward',\n", |
|
|
|
|
|
" 'on_after_optimizers_step',\n", |
|
|
|
|
|
" 'on_after_trainer_initialized',\n", |
|
|
|
|
|
" 'on_after_zero_grad',\n", |
|
|
|
|
|
" 'on_before_backward',\n", |
|
|
|
|
|
" 'on_before_optimizers_step',\n", |
|
|
|
|
|
" 'on_before_zero_grad',\n", |
|
|
|
|
|
" 'on_exception',\n", |
|
|
|
|
|
" 'on_fetch_data_begin',\n", |
|
|
|
|
|
" 'on_fetch_data_end',\n", |
|
|
|
|
|
" 'on_load_checkpoint',\n", |
|
|
|
|
|
" 'on_load_model',\n", |
|
|
|
|
|
" 'on_sanity_check_begin',\n", |
|
|
|
|
|
" 'on_sanity_check_end',\n", |
|
|
|
|
|
" 'on_save_checkpoint',\n", |
|
|
|
|
|
" 'on_save_model',\n", |
|
|
|
|
|
" 'on_train_batch_begin',\n", |
|
|
|
|
|
" 'on_train_batch_end',\n", |
|
|
|
|
|
" 'on_train_begin',\n", |
|
|
|
|
|
" 'on_train_end',\n", |
|
|
|
|
|
" 'on_train_epoch_begin',\n", |
|
|
|
|
|
" 'on_train_epoch_end',\n", |
|
|
|
|
|
" 'on_validate_begin',\n", |
|
|
|
|
|
" 'on_validate_end',\n", |
|
|
|
|
|
" 'optimizers',\n", |
|
|
|
|
|
" 'output_mapping',\n", |
|
|
|
|
|
" 'run',\n", |
|
|
|
|
|
" 'save',\n", |
|
|
|
|
|
" 'save_model',\n", |
|
|
|
|
|
" 'set_grad_to_none',\n", |
|
|
|
|
|
" 'state',\n", |
|
|
|
|
|
" 'step',\n", |
|
|
|
|
|
" 'step_validate',\n", |
|
|
|
|
|
" 'total_batches',\n", |
|
|
|
|
|
" 'train_batch_loop',\n", |
|
|
|
|
|
" 'train_dataloader',\n", |
|
|
|
|
|
" 'train_fn',\n", |
|
|
|
|
|
" 'train_step',\n", |
|
|
|
|
|
" 'trainer_state',\n", |
|
|
|
|
|
" 'zero_grad']" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
"execution_count": 5, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "execute_result" |
|
|
|
|
|
} |
|
|
} |
|
|
], |
|
|
], |
|
|
"source": [ |
|
|
"source": [ |
|
|
"from fastNLP import Trainer\n", |
|
|
"from fastNLP import Trainer\n", |
|
|
"\n", |
|
|
"\n", |
|
|
"# 定义一个 Trainer\n", |
|
|
|
|
|
"trainer = Trainer(\n", |
|
|
"trainer = Trainer(\n", |
|
|
" model=model,\n", |
|
|
" model=model,\n", |
|
|
" driver=\"torch\", # 使用 pytorch 进行训练\n", |
|
|
|
|
|
" device=0, # 使用 GPU:0\n", |
|
|
|
|
|
|
|
|
" driver=\"torch\",\n", |
|
|
|
|
|
" device='cuda',\n", |
|
|
" train_dataloader=train_dataloader,\n", |
|
|
" train_dataloader=train_dataloader,\n", |
|
|
" optimizers=optimizer,\n", |
|
|
" optimizers=optimizer,\n", |
|
|
" n_epochs=10, # 训练 40 个 epoch\n", |
|
|
|
|
|
" progress_bar=\"rich\"\n", |
|
|
|
|
|
")\n", |
|
|
|
|
|
"dir(trainer)" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 8, |
|
|
|
|
|
"id": "f8fe9c32", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"name": "stdout", |
|
|
|
|
|
"output_type": "stream", |
|
|
|
|
|
"text": [ |
|
|
|
|
|
"FullArgSpec(args=['self', 'num_train_batch_per_epoch', 'num_eval_batch_per_dl', 'num_eval_sanity_batch', 'resume_from', 'resume_training', 'catch_KeyboardInterrupt'], varargs=None, varkw=None, defaults=(-1, -1, 2, None, True, None), kwonlyargs=[], kwonlydefaults=None, annotations={'num_train_batch_per_epoch': <class 'int'>, 'num_eval_batch_per_dl': <class 'int'>, 'num_eval_sanity_batch': <class 'int'>, 'resume_from': <class 'str'>, 'resume_training': <class 'bool'>})\n" |
|
|
|
|
|
] |
|
|
|
|
|
} |
|
|
|
|
|
], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import inspect \n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"print(inspect.getfullargspec(trainer.run))" |
|
|
|
|
|
|
|
|
" n_epochs=10, # 设定迭代轮数 \n", |
|
|
|
|
|
" progress_bar=\"auto\" # 设定进度条格式\n", |
|
|
|
|
|
")" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
@@ -517,16 +474,20 @@ |
|
|
"id": "6e202d6e", |
|
|
"id": "6e202d6e", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"没有问题,那么开始真正的训练!" |
|
|
|
|
|
|
|
|
"通过使用`Trainer`类的`run`函数,进行训练\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  其中,可以通过参数`num_train_batch_per_epoch`决定每个`epoch`运行多少个`batch`后停止,默认全部\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  此外,可以通过`inspect.getfullargspec(trainer.run)`查询`run`函数的全部参数列表" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 9, |
|
|
|
|
|
|
|
|
"execution_count": 7, |
|
|
"id": "ba047ead", |
|
|
"id": "ba047ead", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
|
"is_executing": false |
|
|
|
|
|
|
|
|
"is_executing": true |
|
|
} |
|
|
} |
|
|
}, |
|
|
}, |
|
|
"outputs": [ |
|
|
"outputs": [ |
|
@@ -585,29 +546,27 @@ |
|
|
"trainer.run()" |
|
|
"trainer.run()" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "eb8ca6cf", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"## 3. 使用 evaluator 评测模型" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
{ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "c16c5fa4", |
|
|
"id": "c16c5fa4", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"模型训练好了我们开始使用 Evaluator 进行评测,查看效果怎么样吧。" |
|
|
|
|
|
|
|
|
"通过从`fastNLP`库中导入`Evaluator`类,初始化`evaluator`实例,对模型进行评测\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  需要导入预先定义好的模型`model`、对应的数据加载模块`evaluate_dataloader`\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  需要注意的是评测方法`metrics`,设定为形如`{'acc': fastNLP.core.metrics.Accuracy()}`的字典\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  类似地,也可以通过`progress_bar`限定进度条格式,默认为`\"auto\"`" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 10, |
|
|
|
|
|
|
|
|
"execution_count": 8, |
|
|
"id": "1c6b6b36", |
|
|
"id": "1c6b6b36", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
|
"is_executing": false |
|
|
|
|
|
|
|
|
"is_executing": true |
|
|
} |
|
|
} |
|
|
}, |
|
|
}, |
|
|
"outputs": [], |
|
|
"outputs": [], |
|
@@ -617,100 +576,32 @@ |
|
|
"\n", |
|
|
"\n", |
|
|
"evaluator = Evaluator(\n", |
|
|
"evaluator = Evaluator(\n", |
|
|
" model=model,\n", |
|
|
" model=model,\n", |
|
|
" driver=trainer.driver, # 使用 trainer 已经启动的 driver;\n", |
|
|
|
|
|
|
|
|
" driver=trainer.driver, # 需要使用 trainer 已经启动的 driver\n", |
|
|
" device=None,\n", |
|
|
" device=None,\n", |
|
|
" dataloaders=evaluate_dataloader,\n", |
|
|
" dataloaders=evaluate_dataloader,\n", |
|
|
" metrics={'acc': Accuracy()} # 注意这里一定得是一个字典;\n", |
|
|
|
|
|
|
|
|
" metrics={'acc': Accuracy()} # 需要严格使用此种形式的字典\n", |
|
|
")" |
|
|
")" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 11, |
|
|
|
|
|
"id": "257061df", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"scrolled": true |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/plain": [ |
|
|
|
|
|
"['__annotations__',\n", |
|
|
|
|
|
" '__class__',\n", |
|
|
|
|
|
" '__delattr__',\n", |
|
|
|
|
|
" '__dict__',\n", |
|
|
|
|
|
" '__dir__',\n", |
|
|
|
|
|
" '__doc__',\n", |
|
|
|
|
|
" '__eq__',\n", |
|
|
|
|
|
" '__format__',\n", |
|
|
|
|
|
" '__ge__',\n", |
|
|
|
|
|
" '__getattribute__',\n", |
|
|
|
|
|
" '__gt__',\n", |
|
|
|
|
|
" '__hash__',\n", |
|
|
|
|
|
" '__init__',\n", |
|
|
|
|
|
" '__init_subclass__',\n", |
|
|
|
|
|
" '__le__',\n", |
|
|
|
|
|
" '__lt__',\n", |
|
|
|
|
|
" '__module__',\n", |
|
|
|
|
|
" '__ne__',\n", |
|
|
|
|
|
" '__new__',\n", |
|
|
|
|
|
" '__reduce__',\n", |
|
|
|
|
|
" '__reduce_ex__',\n", |
|
|
|
|
|
" '__repr__',\n", |
|
|
|
|
|
" '__setattr__',\n", |
|
|
|
|
|
" '__sizeof__',\n", |
|
|
|
|
|
" '__str__',\n", |
|
|
|
|
|
" '__subclasshook__',\n", |
|
|
|
|
|
" '__weakref__',\n", |
|
|
|
|
|
" '_dist_sampler',\n", |
|
|
|
|
|
" '_evaluate_batch_loop',\n", |
|
|
|
|
|
" '_evaluate_step',\n", |
|
|
|
|
|
" '_evaluate_step_signature_fn',\n", |
|
|
|
|
|
" '_metric_wrapper',\n", |
|
|
|
|
|
" '_metrics',\n", |
|
|
|
|
|
" 'dataloaders',\n", |
|
|
|
|
|
" 'device',\n", |
|
|
|
|
|
" 'driver',\n", |
|
|
|
|
|
" 'evaluate_batch_loop',\n", |
|
|
|
|
|
" 'evaluate_batch_step_fn',\n", |
|
|
|
|
|
" 'evaluate_fn',\n", |
|
|
|
|
|
" 'evaluate_step',\n", |
|
|
|
|
|
" 'finally_progress_bar',\n", |
|
|
|
|
|
" 'get_dataloader_metric',\n", |
|
|
|
|
|
" 'input_mapping',\n", |
|
|
|
|
|
" 'metrics',\n", |
|
|
|
|
|
" 'metrics_wrapper',\n", |
|
|
|
|
|
" 'model',\n", |
|
|
|
|
|
" 'model_use_eval_mode',\n", |
|
|
|
|
|
" 'move_data_to_device',\n", |
|
|
|
|
|
" 'output_mapping',\n", |
|
|
|
|
|
" 'progress_bar',\n", |
|
|
|
|
|
" 'remove_progress_bar',\n", |
|
|
|
|
|
" 'reset',\n", |
|
|
|
|
|
" 'run',\n", |
|
|
|
|
|
" 'separator',\n", |
|
|
|
|
|
" 'start_progress_bar',\n", |
|
|
|
|
|
" 'update',\n", |
|
|
|
|
|
" 'update_progress_bar',\n", |
|
|
|
|
|
" 'verbose']" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
"execution_count": 11, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "execute_result" |
|
|
|
|
|
} |
|
|
|
|
|
], |
|
|
|
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "8157bb9b", |
|
|
|
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"dir(evaluator)" |
|
|
|
|
|
|
|
|
"通过使用`Evaluator`类的`run`函数,进行训练\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  其中,可以通过参数`num_eval_batch_per_dl`决定每个`evaluate_dataloader`运行多少个`batch`停止,默认全部\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  最终,输出形如`{'acc#acc': acc}`的字典,中间的进度条会在运行结束后丢弃掉(???)" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 12, |
|
|
|
|
|
|
|
|
"execution_count": 9, |
|
|
"id": "f7cb0165", |
|
|
"id": "f7cb0165", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
|
"is_executing": false |
|
|
|
|
|
|
|
|
"is_executing": true |
|
|
} |
|
|
} |
|
|
}, |
|
|
}, |
|
|
"outputs": [ |
|
|
"outputs": [ |
|
@@ -750,11 +641,11 @@ |
|
|
{ |
|
|
{ |
|
|
"data": { |
|
|
"data": { |
|
|
"text/html": [ |
|
|
"text/html": [ |
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'acc#acc'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.3</span><span style=\"font-weight: bold\">}</span>\n", |
|
|
|
|
|
|
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'acc#acc'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.43</span><span style=\"font-weight: bold\">}</span>\n", |
|
|
"</pre>\n" |
|
|
"</pre>\n" |
|
|
], |
|
|
], |
|
|
"text/plain": [ |
|
|
"text/plain": [ |
|
|
"\u001b[1m{\u001b[0m\u001b[32m'acc#acc'\u001b[0m: \u001b[1;36m0.3\u001b[0m\u001b[1m}\u001b[0m\n" |
|
|
|
|
|
|
|
|
"\u001b[1m{\u001b[0m\u001b[32m'acc#acc'\u001b[0m: \u001b[1;36m0.43\u001b[0m\u001b[1m}\u001b[0m\n" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
@@ -763,10 +654,10 @@ |
|
|
{ |
|
|
{ |
|
|
"data": { |
|
|
"data": { |
|
|
"text/plain": [ |
|
|
"text/plain": [ |
|
|
"{'acc#acc': 0.3}" |
|
|
|
|
|
|
|
|
"{'acc#acc': 0.43}" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
"execution_count": 12, |
|
|
|
|
|
|
|
|
"execution_count": 9, |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"output_type": "execute_result" |
|
|
"output_type": "execute_result" |
|
|
} |
|
|
} |
|
@@ -780,39 +671,37 @@ |
|
|
"id": "dd9f68fa", |
|
|
"id": "dd9f68fa", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"## 4. 在 trainer 中加入 metric 来自动评测;" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"id": "ca97c9a4", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"现在我们尝试在训练过程中进行评测。" |
|
|
|
|
|
|
|
|
"### 3.2 trainer 内部初始化的 evaluator \n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"通过在初始化`trainer`实例时加入`evaluate_dataloaders`和`metrics`,可以实现在训练过程中进行评测\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  通过`progress_bar`同时设定训练和评估进度条格式,训练结束后进度条会不显示(???)\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  **通过`evaluate_every`设定评估频率**,可以为负数、正数或者函数:\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"    **为负数时**,**表示每隔几个`epoch`评估一次**;**为正数时**,**则表示每隔几个`batch`评估一次**" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 13, |
|
|
|
|
|
|
|
|
"execution_count": 10, |
|
|
"id": "183c7d19", |
|
|
"id": "183c7d19", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
|
"is_executing": false |
|
|
|
|
|
|
|
|
"is_executing": true |
|
|
} |
|
|
} |
|
|
}, |
|
|
}, |
|
|
"outputs": [], |
|
|
"outputs": [], |
|
|
"source": [ |
|
|
"source": [ |
|
|
"# 重新定义一个 Trainer\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"trainer = Trainer(\n", |
|
|
"trainer = Trainer(\n", |
|
|
" model=model,\n", |
|
|
" model=model,\n", |
|
|
" driver=trainer.driver, # 因为我们是在同一脚本中,因此这里的 driver 同样需要重用;\n", |
|
|
|
|
|
|
|
|
" driver=trainer.driver, # 因为是在同个脚本中,这里的 driver 同样需要重用\n", |
|
|
" train_dataloader=train_dataloader,\n", |
|
|
" train_dataloader=train_dataloader,\n", |
|
|
" evaluate_dataloaders=evaluate_dataloader,\n", |
|
|
" evaluate_dataloaders=evaluate_dataloader,\n", |
|
|
" metrics={'acc': Accuracy()},\n", |
|
|
" metrics={'acc': Accuracy()},\n", |
|
|
" optimizers=optimizer,\n", |
|
|
" optimizers=optimizer,\n", |
|
|
" n_epochs=10, # 训练 40 个 epoch;\n", |
|
|
|
|
|
" evaluate_every=-1, # 表示每一个 epoch 的结束会进行 evaluate;\n", |
|
|
|
|
|
|
|
|
" n_epochs=10, \n", |
|
|
|
|
|
" evaluate_every=-1, # 表示每个 epoch 的结束进行评估\n", |
|
|
")" |
|
|
")" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
@@ -821,16 +710,18 @@ |
|
|
"id": "714cc404", |
|
|
"id": "714cc404", |
|
|
"metadata": {}, |
|
|
"metadata": {}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"再次训练。" |
|
|
|
|
|
|
|
|
"通过使用`Trainer`类的`run`函数,进行训练\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"  还可以通过参数`num_eval_sanity_batch`决定每次训练前运行多少个`evaluate_batch`进行评测,默认为2" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 14, |
|
|
|
|
|
|
|
|
"execution_count": 11, |
|
|
"id": "2e4daa2c", |
|
|
"id": "2e4daa2c", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"pycharm": { |
|
|
"pycharm": { |
|
|
"is_executing": false |
|
|
|
|
|
|
|
|
"is_executing": true |
|
|
} |
|
|
} |
|
|
}, |
|
|
}, |
|
|
"outputs": [ |
|
|
"outputs": [ |
|
@@ -884,96 +775,6 @@ |
|
|
"source": [ |
|
|
"source": [ |
|
|
"trainer.run()" |
|
|
"trainer.run()" |
|
|
] |
|
|
] |
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 15, |
|
|
|
|
|
"id": "eabda5eb", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"evaluator = Evaluator(\n", |
|
|
|
|
|
" model=model,\n", |
|
|
|
|
|
" driver=trainer.driver, # 使用 trainer 已经启动的 driver;\n", |
|
|
|
|
|
" dataloaders=evaluate_dataloader,\n", |
|
|
|
|
|
" metrics={'acc': Accuracy()} # 注意这里一定得是一个字典;\n", |
|
|
|
|
|
")" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": 16, |
|
|
|
|
|
"id": "a310d157", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/html": [ |
|
|
|
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" |
|
|
|
|
|
], |
|
|
|
|
|
"text/plain": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "display_data" |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/html": [ |
|
|
|
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" |
|
|
|
|
|
], |
|
|
|
|
|
"text/plain": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "display_data" |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/html": [ |
|
|
|
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n", |
|
|
|
|
|
"</pre>\n" |
|
|
|
|
|
], |
|
|
|
|
|
"text/plain": [ |
|
|
|
|
|
"\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "display_data" |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/html": [ |
|
|
|
|
|
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'acc#acc'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.5</span><span style=\"font-weight: bold\">}</span>\n", |
|
|
|
|
|
"</pre>\n" |
|
|
|
|
|
], |
|
|
|
|
|
"text/plain": [ |
|
|
|
|
|
"\u001b[1m{\u001b[0m\u001b[32m'acc#acc'\u001b[0m: \u001b[1;36m0.5\u001b[0m\u001b[1m}\u001b[0m\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "display_data" |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"data": { |
|
|
|
|
|
"text/plain": [ |
|
|
|
|
|
"{'acc#acc': 0.5}" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
"execution_count": 16, |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"output_type": "execute_result" |
|
|
|
|
|
} |
|
|
|
|
|
], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"evaluator.run()" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "f1ef78f0", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [] |
|
|
|
|
|
} |
|
|
} |
|
|
], |
|
|
], |
|
|
"metadata": { |
|
|
"metadata": { |
|
|