# 使用Modules和Models快速搭建自定义模型

modules 和 models 用于构建 fastNLP 所需的神经网络模型,它可以和 torch.nn 中的模型一起使用。 下面我们会分三节介绍编写构建模型的具体方法。


我们首先准备好和上篇教程一样的基础实验代码

In [2]:
from fastNLP.io import SST2Pipe
from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric
import torch

databundle = SST2Pipe().process_from_file()
vocab = databundle.get_vocab('words')
train_data = databundle.get_dataset('train')[:5000]
train_data, test_data = train_data.split(0.015)
dev_data = databundle.get_dataset('dev')

loss = CrossEntropyLoss()
metric = AccuracyMetric()
device = 0 if torch.cuda.is_available() else 'cpu'

## 使用 models 中的模型

fastNLP 在 models 模块中内置了如 CNNText 、 SeqLabeling 等完整的模型,以供用户直接使用。 以文本分类的任务为例,我们从 models 中导入 CNNText 模型,用它进行训练。

In [3]:
from fastNLP.models import CNNText

model_cnn = CNNText((len(vocab),100), num_classes=2, dropout=0.1)

trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,
 loss=loss, device=device, model=model_cnn)
trainer.train()

input fields after batch(if batch size is 2):
	words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) 
	seq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 
target fields after batch(if batch size is 2):
	target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 

training epochs started 2020-02-28-00-56-04


HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.22 seconds!
Evaluation on dev at Epoch 1/10. Step:154/1540: 
AccuracyMetric: acc=0.760321



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.29 seconds!
Evaluation on dev at Epoch 2/10. Step:308/1540: 
AccuracyMetric: acc=0.727064



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.48 seconds!
Evaluation on dev at Epoch 3/10. Step:462/1540: 
AccuracyMetric: acc=0.758028



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.24 seconds!
Evaluation on dev at Epoch 4/10. Step:616/1540: 
AccuracyMetric: acc=0.759174



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.47 seconds!
Evaluation on dev at Epoch 5/10. Step:770/1540: 
AccuracyMetric: acc=0.743119



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.22 seconds!
Evaluation on dev at Epoch 6/10. Step:924/1540: 
AccuracyMetric: acc=0.756881



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.21 seconds!
Evaluation on dev at Epoch 7/10. Step:1078/1540: 
AccuracyMetric: acc=0.752294



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.21 seconds!
Evaluation on dev at Epoch 8/10. Step:1232/1540: 
AccuracyMetric: acc=0.756881



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.15 seconds!
Evaluation on dev at Epoch 9/10. Step:1386/1540: 
AccuracyMetric: acc=0.75344



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.12 seconds!
Evaluation on dev at Epoch 10/10. Step:1540/1540: 
AccuracyMetric: acc=0.752294


In Epoch:1/Step:154, got best dev performance:
AccuracyMetric: acc=0.760321
Reloaded the best model.


{'best_eval': {'AccuracyMetric': {'acc': 0.760321}},
 'best_epoch': 1,
 'best_step': 154,
 'seconds': 29.3}

在 iPython 环境输入 model_cnn ,我们可以看到 model_cnn 的网络结构

In [4]:
model_cnn

CNNText(
 (embed): Embedding(
 (embed): Embedding(16292, 100)
 (dropout): Dropout(p=0.0, inplace=False)
 )
 (conv_pool): ConvMaxpool(
 (convs): ModuleList(
 (0): Conv1d(100, 30, kernel_size=(1,), stride=(1,), bias=False)
 (1): Conv1d(100, 40, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
 (2): Conv1d(100, 50, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)
 )
 )
 (dropout): Dropout(p=0.1, inplace=False)
 (fc): Linear(in_features=120, out_features=2, bias=True)
)

## 使用 nn.torch 编写模型

FastNLP 完全支持使用 pyTorch 编写的模型,但与 pyTorch 中编写模型的常见方法不同, 用于 fastNLP 的模型中 forward 函数需要返回一个字典,字典中至少需要包含 pred 这个字段。

下面是使用 pyTorch 中的 torch.nn 模块编写的文本分类,注意观察代码中标注的向量维度。 由于 pyTorch 使用了约定俗成的维度设置,使得 forward 中需要多次处理维度顺序

In [5]:
import torch
import torch.nn as nn

class LSTMText(nn.Module):
 def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):
 super().__init__()

 self.embedding = nn.Embedding(vocab_size, embedding_dim)
 self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout)
 self.fc = nn.Linear(hidden_dim * 2, output_dim)
 self.dropout = nn.Dropout(dropout)

 def forward(self, words):
 # (input) words : (batch_size, seq_len)
 words = words.permute(1,0)
 # words : (seq_len, batch_size)

 embedded = self.dropout(self.embedding(words))
 # embedded : (seq_len, batch_size, embedding_dim)
 output, (hidden, cell) = self.lstm(embedded)
 # output: (seq_len, batch_size, hidden_dim * 2)
 # hidden: (num_layers * 2, batch_size, hidden_dim)
 # cell: (num_layers * 2, batch_size, hidden_dim)

 hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
 hidden = self.dropout(hidden)
 # hidden: (batch_size, hidden_dim * 2)

 pred = self.fc(hidden.squeeze(0))
 # result: (batch_size, output_dim)
 return {"pred":pred}

我们同样可以在 iPython 环境中查看这个模型的网络结构

In [6]:
model_lstm = LSTMText(len(vocab), 100, 2)
model_lstm 

LSTMText(
 (embedding): Embedding(16292, 100)
 (lstm): LSTM(100, 64, num_layers=2, dropout=0.5, bidirectional=True)
 (fc): Linear(in_features=128, out_features=2, bias=True)
 (dropout): Dropout(p=0.5, inplace=False)
)

In [7]:
trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,
 loss=loss, device=device, model=model_lstm)
trainer.train()

input fields after batch(if batch size is 2):
	words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) 
	seq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 
target fields after batch(if batch size is 2):
	target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 

training epochs started 2020-02-28-00-56-34


HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.36 seconds!
Evaluation on dev at Epoch 1/10. Step:154/1540: 
AccuracyMetric: acc=0.59289



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.35 seconds!
Evaluation on dev at Epoch 2/10. Step:308/1540: 
AccuracyMetric: acc=0.674312



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.21 seconds!
Evaluation on dev at Epoch 3/10. Step:462/1540: 
AccuracyMetric: acc=0.724771



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.4 seconds!
Evaluation on dev at Epoch 4/10. Step:616/1540: 
AccuracyMetric: acc=0.748853



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.24 seconds!
Evaluation on dev at Epoch 5/10. Step:770/1540: 
AccuracyMetric: acc=0.756881



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.29 seconds!
Evaluation on dev at Epoch 6/10. Step:924/1540: 
AccuracyMetric: acc=0.741972



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.32 seconds!
Evaluation on dev at Epoch 7/10. Step:1078/1540: 
AccuracyMetric: acc=0.754587



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.24 seconds!
Evaluation on dev at Epoch 8/10. Step:1232/1540: 
AccuracyMetric: acc=0.756881



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.28 seconds!
Evaluation on dev at Epoch 9/10. Step:1386/1540: 
AccuracyMetric: acc=0.740826



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.23 seconds!
Evaluation on dev at Epoch 10/10. Step:1540/1540: 
AccuracyMetric: acc=0.751147


In Epoch:5/Step:770, got best dev performance:
AccuracyMetric: acc=0.756881
Reloaded the best model.


{'best_eval': {'AccuracyMetric': {'acc': 0.756881}},
 'best_epoch': 5,
 'best_step': 770,
 'seconds': 45.69}

## 使用 modules 编写模型

下面我们使用 fastNLP.modules 中的组件来构建同样的网络。由于 fastNLP 统一把 batch_size 放在第一维, 在编写代码的过程中会有一定的便利。

In [8]:
from fastNLP.modules import LSTM, MLP
from fastNLP.embeddings import Embedding


class MyText(nn.Module):
 def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):
 super().__init__()

 self.embedding = Embedding((vocab_size, embedding_dim))
 self.lstm = LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True)
 self.mlp = MLP([hidden_dim*2,output_dim], dropout=dropout)

 def forward(self, words):
 embedded = self.embedding(words)
 _,(hidden,_) = self.lstm(embedded)
 pred = self.mlp(torch.cat((hidden[-1],hidden[-2]),dim=1))
 return {"pred":pred}
 
model_text = MyText(len(vocab), 100, 2)
model_text

MyText(
 (embedding): Embedding(
 (embed): Embedding(16292, 100)
 (dropout): Dropout(p=0.0, inplace=False)
 )
 (lstm): LSTM(
 (lstm): LSTM(100, 64, num_layers=2, batch_first=True, bidirectional=True)
 )
 (mlp): MLP(
 (hiddens): ModuleList()
 (output): Linear(in_features=128, out_features=2, bias=True)
 (dropout): Dropout(p=0.5, inplace=False)
 )
)

In [None]:
trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,
 loss=loss, device=device, model=model_lstm)
trainer.train()

input fields after batch(if batch size is 2):
	words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) 
	seq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 
target fields after batch(if batch size is 2):
	target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) 

training epochs started 2020-02-28-00-57-19


HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.38 seconds!
Evaluation on dev at Epoch 1/10. Step:154/1540: 
AccuracyMetric: acc=0.767202



HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…

Evaluate data in 0.22 seconds!
Evaluation on dev at Epoch 2/10. Step:308/1540: 
AccuracyMetric: acc=0.743119

