From bbab238bb0cb0fff0a4aac33f1e5a29f53afe3c0 Mon Sep 17 00:00:00 2001 From: ChenXin Date: Thu, 9 May 2019 15:40:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E6=A1=A3=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fastNLP/models/sequence_labeling.py | 109 ++++++++++++++-------------- 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/fastNLP/models/sequence_labeling.py b/fastNLP/models/sequence_labeling.py index 880bd8a8..015ae24a 100644 --- a/fastNLP/models/sequence_labeling.py +++ b/fastNLP/models/sequence_labeling.py @@ -7,28 +7,27 @@ from ..modules.utils import seq_mask from ..core.const import Const as C from torch import nn + class SeqLabeling(BaseModel): """ - 一个基础的Sequence labeling的模型 + 一个基础的Sequence labeling的模型。 + 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 + + :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), + 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding + :param int hidden_size: LSTM隐藏层的大小 + :param int num_classes: 一共有多少类 """ - + def __init__(self, init_embed, hidden_size, num_classes): - """ - 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 - - :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), - 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding - :param int hidden_size: LSTM隐藏层的大小 - :param int num_classes: 一共有多少类 - """ super(SeqLabeling, self).__init__() - + self.Embedding = encoder.embedding.Embedding(init_embed) self.Rnn = encoder.lstm.LSTM(self.Embedding.embedding_dim, hidden_size) self.Linear = nn.Linear(hidden_size, num_classes) self.Crf = decoder.CRF.ConditionalRandomField(num_classes) self.mask = None - + def forward(self, words, seq_len, target): """ :param torch.LongTensor words: [batch_size, max_len],序列的index @@ -40,15 +39,15 @@ class SeqLabeling(BaseModel): assert words.shape[0] == seq_len.shape[0] assert target.shape == words.shape self.mask = self._make_mask(words, seq_len) - + x = self.Embedding(words) # [batch_size, max_len, word_emb_dim] - x,_ = self.Rnn(x, seq_len) + x, _ = self.Rnn(x, seq_len) # [batch_size, max_len, hidden_size * direction] x = self.Linear(x) # [batch_size, max_len, num_classes] return {C.LOSS: self._internal_loss(x, target)} - + def predict(self, words, seq_len): """ 用于在预测时使用 @@ -58,7 +57,7 @@ class SeqLabeling(BaseModel): :return: {'pred': xx}, [batch_size, max_len] """ self.mask = self._make_mask(words, seq_len) - + x = self.Embedding(words) # [batch_size, max_len, word_emb_dim] x, _ = self.Rnn(x, seq_len) @@ -67,7 +66,7 @@ class SeqLabeling(BaseModel): # [batch_size, max_len, num_classes] pred = self._decode(x) return {C.OUTPUT: pred} - + def _internal_loss(self, x, y): """ Negative log likelihood loss. @@ -82,14 +81,14 @@ class SeqLabeling(BaseModel): assert y.shape == self.mask.shape total_loss = self.Crf(x, y, self.mask) return torch.mean(total_loss) - + def _make_mask(self, x, seq_len): batch_size, max_len = x.size(0), x.size(1) mask = seq_mask(seq_len, max_len) mask = mask.view(batch_size, max_len) mask = mask.to(x).float() return mask - + def _decode(self, x): """ :param torch.FloatTensor x: [batch_size, max_len, tag_size] @@ -102,40 +101,40 @@ class SeqLabeling(BaseModel): class AdvSeqLabel(nn.Module): """ 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 + + :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), + 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding + :param int hidden_size: LSTM的隐层大小 + :param int num_classes: 有多少个类 + :param float dropout: LSTM中以及DropOut层的drop概率 + :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' + 不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 + 'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) + :param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。 """ - + def __init__(self, init_embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'): - """ - - :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), - 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding - :param int hidden_size: LSTM的隐层大小 - :param int num_classes: 有多少个类 - :param float dropout: LSTM中以及DropOut层的drop概率 - :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' - 不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 - 'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) - :param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况游泳。 - """ + super().__init__() - + self.Embedding = encoder.embedding.Embedding(init_embed) self.norm1 = torch.nn.LayerNorm(self.Embedding.embedding_dim) - self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout, - bidirectional=True, batch_first=True) + self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, + dropout=dropout, + bidirectional=True, batch_first=True) self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3) self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) self.relu = torch.nn.LeakyReLU() self.drop = torch.nn.Dropout(dropout) self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes) - + if id2words is None: self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False) else: self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False, allowed_transitions=allowed_transitions(id2words, - encoding_type=encoding_type)) - + encoding_type=encoding_type)) + def _decode(self, x): """ :param torch.FloatTensor x: [batch_size, max_len, tag_size] @@ -143,7 +142,7 @@ class AdvSeqLabel(nn.Module): """ tag_seq, _ = self.Crf.viterbi_decode(x, self.mask) return tag_seq - + def _internal_loss(self, x, y): """ Negative log likelihood loss. @@ -158,14 +157,14 @@ class AdvSeqLabel(nn.Module): assert y.shape == self.mask.shape total_loss = self.Crf(x, y, self.mask) return torch.mean(total_loss) - + def _make_mask(self, x, seq_len): batch_size, max_len = x.size(0), x.size(1) mask = seq_mask(seq_len, max_len) mask = mask.view(batch_size, max_len) mask = mask.to(x).float() return mask - + def _forward(self, words, seq_len, target=None): """ :param torch.LongTensor words: [batch_size, mex_len] @@ -174,24 +173,24 @@ class AdvSeqLabel(nn.Module): :return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting. If truth is not None, return loss, a scalar. Used in training. """ - + words = words.long() seq_len = seq_len.long() self.mask = self._make_mask(words, seq_len) - + # seq_len = seq_len.long() target = target.long() if target is not None else None - + if next(self.parameters()).is_cuda: words = words.cuda() self.mask = self.mask.cuda() - + x = self.Embedding(words) x = self.norm1(x) # [batch_size, max_len, word_emb_dim] - + x, _ = self.Rnn(x, seq_len=seq_len) - + x = self.Linear1(x) x = self.norm2(x) x = self.relu(x) @@ -201,22 +200,22 @@ class AdvSeqLabel(nn.Module): return {"loss": self._internal_loss(x, target)} else: return {"pred": self._decode(x)} - + def forward(self, words, seq_len, target): """ + :param torch.LongTensor words: [batch_size, mex_len] - :param torch.LongTensor seq_len:[batch_size, ] + :param torch.LongTensor seq_len: [batch_size, ] :param torch.LongTensor target: [batch_size, max_len], 目标 - :return torch.Tensor, a scalar loss + :return torch.Tensor: a scalar loss """ return self._forward(words, seq_len, target) - + def predict(self, words, seq_len): """ - + :param torch.LongTensor words: [batch_size, mex_len] - :param torch.LongTensor seq_len:[batch_size, ] - :return {'pred':}, value是torch.LongTensor, [batch_size, max_len] - + :param torch.LongTensor seq_len: [batch_size, ] + :return torch.LongTensor: [batch_size, max_len] """ return self._forward(words, seq_len)