Browse Source

修改文档格式

tags/v0.4.10
ChenXin 5 years ago
parent
commit
bbab238bb0
1 changed files with 54 additions and 55 deletions
  1. +54
    -55
      fastNLP/models/sequence_labeling.py

+ 54
- 55
fastNLP/models/sequence_labeling.py View File

@@ -7,28 +7,27 @@ from ..modules.utils import seq_mask
from ..core.const import Const as C from ..core.const import Const as C
from torch import nn from torch import nn



class SeqLabeling(BaseModel): class SeqLabeling(BaseModel):
""" """
一个基础的Sequence labeling的模型
一个基础的Sequence labeling的模型。
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding
:param int hidden_size: LSTM隐藏层的大小
:param int num_classes: 一共有多少类
""" """

def __init__(self, init_embed, hidden_size, num_classes): def __init__(self, init_embed, hidden_size, num_classes):
"""
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。

:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding
:param int hidden_size: LSTM隐藏层的大小
:param int num_classes: 一共有多少类
"""
super(SeqLabeling, self).__init__() super(SeqLabeling, self).__init__()
self.Embedding = encoder.embedding.Embedding(init_embed) self.Embedding = encoder.embedding.Embedding(init_embed)
self.Rnn = encoder.lstm.LSTM(self.Embedding.embedding_dim, hidden_size) self.Rnn = encoder.lstm.LSTM(self.Embedding.embedding_dim, hidden_size)
self.Linear = nn.Linear(hidden_size, num_classes) self.Linear = nn.Linear(hidden_size, num_classes)
self.Crf = decoder.CRF.ConditionalRandomField(num_classes) self.Crf = decoder.CRF.ConditionalRandomField(num_classes)
self.mask = None self.mask = None
def forward(self, words, seq_len, target): def forward(self, words, seq_len, target):
""" """
:param torch.LongTensor words: [batch_size, max_len],序列的index :param torch.LongTensor words: [batch_size, max_len],序列的index
@@ -40,15 +39,15 @@ class SeqLabeling(BaseModel):
assert words.shape[0] == seq_len.shape[0] assert words.shape[0] == seq_len.shape[0]
assert target.shape == words.shape assert target.shape == words.shape
self.mask = self._make_mask(words, seq_len) self.mask = self._make_mask(words, seq_len)
x = self.Embedding(words) x = self.Embedding(words)
# [batch_size, max_len, word_emb_dim] # [batch_size, max_len, word_emb_dim]
x,_ = self.Rnn(x, seq_len)
x, _ = self.Rnn(x, seq_len)
# [batch_size, max_len, hidden_size * direction] # [batch_size, max_len, hidden_size * direction]
x = self.Linear(x) x = self.Linear(x)
# [batch_size, max_len, num_classes] # [batch_size, max_len, num_classes]
return {C.LOSS: self._internal_loss(x, target)} return {C.LOSS: self._internal_loss(x, target)}
def predict(self, words, seq_len): def predict(self, words, seq_len):
""" """
用于在预测时使用 用于在预测时使用
@@ -58,7 +57,7 @@ class SeqLabeling(BaseModel):
:return: {'pred': xx}, [batch_size, max_len] :return: {'pred': xx}, [batch_size, max_len]
""" """
self.mask = self._make_mask(words, seq_len) self.mask = self._make_mask(words, seq_len)
x = self.Embedding(words) x = self.Embedding(words)
# [batch_size, max_len, word_emb_dim] # [batch_size, max_len, word_emb_dim]
x, _ = self.Rnn(x, seq_len) x, _ = self.Rnn(x, seq_len)
@@ -67,7 +66,7 @@ class SeqLabeling(BaseModel):
# [batch_size, max_len, num_classes] # [batch_size, max_len, num_classes]
pred = self._decode(x) pred = self._decode(x)
return {C.OUTPUT: pred} return {C.OUTPUT: pred}
def _internal_loss(self, x, y): def _internal_loss(self, x, y):
""" """
Negative log likelihood loss. Negative log likelihood loss.
@@ -82,14 +81,14 @@ class SeqLabeling(BaseModel):
assert y.shape == self.mask.shape assert y.shape == self.mask.shape
total_loss = self.Crf(x, y, self.mask) total_loss = self.Crf(x, y, self.mask)
return torch.mean(total_loss) return torch.mean(total_loss)
def _make_mask(self, x, seq_len): def _make_mask(self, x, seq_len):
batch_size, max_len = x.size(0), x.size(1) batch_size, max_len = x.size(0), x.size(1)
mask = seq_mask(seq_len, max_len) mask = seq_mask(seq_len, max_len)
mask = mask.view(batch_size, max_len) mask = mask.view(batch_size, max_len)
mask = mask.to(x).float() mask = mask.to(x).float()
return mask return mask
def _decode(self, x): def _decode(self, x):
""" """
:param torch.FloatTensor x: [batch_size, max_len, tag_size] :param torch.FloatTensor x: [batch_size, max_len, tag_size]
@@ -102,40 +101,40 @@ class SeqLabeling(BaseModel):
class AdvSeqLabel(nn.Module): class AdvSeqLabel(nn.Module):
""" """
更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding
:param int hidden_size: LSTM的隐层大小
:param int num_classes: 有多少个类
:param float dropout: LSTM中以及DropOut层的drop概率
:param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S'
不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证
'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。)
:param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。
""" """

def __init__(self, init_embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'): def __init__(self, init_embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'):
"""

:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding
:param int hidden_size: LSTM的隐层大小
:param int num_classes: 有多少个类
:param float dropout: LSTM中以及DropOut层的drop概率
:param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S'
不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证
'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。)
:param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况游泳。
"""
super().__init__() super().__init__()
self.Embedding = encoder.embedding.Embedding(init_embed) self.Embedding = encoder.embedding.Embedding(init_embed)
self.norm1 = torch.nn.LayerNorm(self.Embedding.embedding_dim) self.norm1 = torch.nn.LayerNorm(self.Embedding.embedding_dim)
self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout,
bidirectional=True, batch_first=True)
self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2,
dropout=dropout,
bidirectional=True, batch_first=True)
self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3) self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3)
self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3)
self.relu = torch.nn.LeakyReLU() self.relu = torch.nn.LeakyReLU()
self.drop = torch.nn.Dropout(dropout) self.drop = torch.nn.Dropout(dropout)
self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes) self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes)
if id2words is None: if id2words is None:
self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False) self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False)
else: else:
self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False, self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False,
allowed_transitions=allowed_transitions(id2words, allowed_transitions=allowed_transitions(id2words,
encoding_type=encoding_type))
encoding_type=encoding_type))
def _decode(self, x): def _decode(self, x):
""" """
:param torch.FloatTensor x: [batch_size, max_len, tag_size] :param torch.FloatTensor x: [batch_size, max_len, tag_size]
@@ -143,7 +142,7 @@ class AdvSeqLabel(nn.Module):
""" """
tag_seq, _ = self.Crf.viterbi_decode(x, self.mask) tag_seq, _ = self.Crf.viterbi_decode(x, self.mask)
return tag_seq return tag_seq
def _internal_loss(self, x, y): def _internal_loss(self, x, y):
""" """
Negative log likelihood loss. Negative log likelihood loss.
@@ -158,14 +157,14 @@ class AdvSeqLabel(nn.Module):
assert y.shape == self.mask.shape assert y.shape == self.mask.shape
total_loss = self.Crf(x, y, self.mask) total_loss = self.Crf(x, y, self.mask)
return torch.mean(total_loss) return torch.mean(total_loss)
def _make_mask(self, x, seq_len): def _make_mask(self, x, seq_len):
batch_size, max_len = x.size(0), x.size(1) batch_size, max_len = x.size(0), x.size(1)
mask = seq_mask(seq_len, max_len) mask = seq_mask(seq_len, max_len)
mask = mask.view(batch_size, max_len) mask = mask.view(batch_size, max_len)
mask = mask.to(x).float() mask = mask.to(x).float()
return mask return mask
def _forward(self, words, seq_len, target=None): def _forward(self, words, seq_len, target=None):
""" """
:param torch.LongTensor words: [batch_size, mex_len] :param torch.LongTensor words: [batch_size, mex_len]
@@ -174,24 +173,24 @@ class AdvSeqLabel(nn.Module):
:return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting. :return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting.
If truth is not None, return loss, a scalar. Used in training. If truth is not None, return loss, a scalar. Used in training.
""" """
words = words.long() words = words.long()
seq_len = seq_len.long() seq_len = seq_len.long()
self.mask = self._make_mask(words, seq_len) self.mask = self._make_mask(words, seq_len)
# seq_len = seq_len.long() # seq_len = seq_len.long()
target = target.long() if target is not None else None target = target.long() if target is not None else None
if next(self.parameters()).is_cuda: if next(self.parameters()).is_cuda:
words = words.cuda() words = words.cuda()
self.mask = self.mask.cuda() self.mask = self.mask.cuda()
x = self.Embedding(words) x = self.Embedding(words)
x = self.norm1(x) x = self.norm1(x)
# [batch_size, max_len, word_emb_dim] # [batch_size, max_len, word_emb_dim]
x, _ = self.Rnn(x, seq_len=seq_len) x, _ = self.Rnn(x, seq_len=seq_len)
x = self.Linear1(x) x = self.Linear1(x)
x = self.norm2(x) x = self.norm2(x)
x = self.relu(x) x = self.relu(x)
@@ -201,22 +200,22 @@ class AdvSeqLabel(nn.Module):
return {"loss": self._internal_loss(x, target)} return {"loss": self._internal_loss(x, target)}
else: else:
return {"pred": self._decode(x)} return {"pred": self._decode(x)}
def forward(self, words, seq_len, target): def forward(self, words, seq_len, target):
""" """
:param torch.LongTensor words: [batch_size, mex_len] :param torch.LongTensor words: [batch_size, mex_len]
:param torch.LongTensor seq_len:[batch_size, ]
:param torch.LongTensor seq_len: [batch_size, ]
:param torch.LongTensor target: [batch_size, max_len], 目标 :param torch.LongTensor target: [batch_size, max_len], 目标
:return torch.Tensor, a scalar loss
:return torch.Tensor: a scalar loss
""" """
return self._forward(words, seq_len, target) return self._forward(words, seq_len, target)
def predict(self, words, seq_len): def predict(self, words, seq_len):
""" """
:param torch.LongTensor words: [batch_size, mex_len] :param torch.LongTensor words: [batch_size, mex_len]
:param torch.LongTensor seq_len:[batch_size, ]
:return {'pred':}, value是torch.LongTensor, [batch_size, max_len]

:param torch.LongTensor seq_len: [batch_size, ]
:return torch.LongTensor: [batch_size, max_len]
""" """
return self._forward(words, seq_len) return self._forward(words, seq_len)

Loading…
Cancel
Save