|
|
@@ -7,28 +7,27 @@ from ..modules.utils import seq_mask |
|
|
|
from ..core.const import Const as C |
|
|
|
from torch import nn |
|
|
|
|
|
|
|
|
|
|
|
class SeqLabeling(BaseModel): |
|
|
|
""" |
|
|
|
一个基础的Sequence labeling的模型 |
|
|
|
一个基础的Sequence labeling的模型。 |
|
|
|
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 |
|
|
|
|
|
|
|
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), |
|
|
|
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding |
|
|
|
:param int hidden_size: LSTM隐藏层的大小 |
|
|
|
:param int num_classes: 一共有多少类 |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, init_embed, hidden_size, num_classes): |
|
|
|
""" |
|
|
|
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 |
|
|
|
|
|
|
|
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), |
|
|
|
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding |
|
|
|
:param int hidden_size: LSTM隐藏层的大小 |
|
|
|
:param int num_classes: 一共有多少类 |
|
|
|
""" |
|
|
|
super(SeqLabeling, self).__init__() |
|
|
|
|
|
|
|
|
|
|
|
self.Embedding = encoder.embedding.Embedding(init_embed) |
|
|
|
self.Rnn = encoder.lstm.LSTM(self.Embedding.embedding_dim, hidden_size) |
|
|
|
self.Linear = nn.Linear(hidden_size, num_classes) |
|
|
|
self.Crf = decoder.CRF.ConditionalRandomField(num_classes) |
|
|
|
self.mask = None |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, words, seq_len, target): |
|
|
|
""" |
|
|
|
:param torch.LongTensor words: [batch_size, max_len],序列的index |
|
|
@@ -40,15 +39,15 @@ class SeqLabeling(BaseModel): |
|
|
|
assert words.shape[0] == seq_len.shape[0] |
|
|
|
assert target.shape == words.shape |
|
|
|
self.mask = self._make_mask(words, seq_len) |
|
|
|
|
|
|
|
|
|
|
|
x = self.Embedding(words) |
|
|
|
# [batch_size, max_len, word_emb_dim] |
|
|
|
x,_ = self.Rnn(x, seq_len) |
|
|
|
x, _ = self.Rnn(x, seq_len) |
|
|
|
# [batch_size, max_len, hidden_size * direction] |
|
|
|
x = self.Linear(x) |
|
|
|
# [batch_size, max_len, num_classes] |
|
|
|
return {C.LOSS: self._internal_loss(x, target)} |
|
|
|
|
|
|
|
|
|
|
|
def predict(self, words, seq_len): |
|
|
|
""" |
|
|
|
用于在预测时使用 |
|
|
@@ -58,7 +57,7 @@ class SeqLabeling(BaseModel): |
|
|
|
:return: {'pred': xx}, [batch_size, max_len] |
|
|
|
""" |
|
|
|
self.mask = self._make_mask(words, seq_len) |
|
|
|
|
|
|
|
|
|
|
|
x = self.Embedding(words) |
|
|
|
# [batch_size, max_len, word_emb_dim] |
|
|
|
x, _ = self.Rnn(x, seq_len) |
|
|
@@ -67,7 +66,7 @@ class SeqLabeling(BaseModel): |
|
|
|
# [batch_size, max_len, num_classes] |
|
|
|
pred = self._decode(x) |
|
|
|
return {C.OUTPUT: pred} |
|
|
|
|
|
|
|
|
|
|
|
def _internal_loss(self, x, y): |
|
|
|
""" |
|
|
|
Negative log likelihood loss. |
|
|
@@ -82,14 +81,14 @@ class SeqLabeling(BaseModel): |
|
|
|
assert y.shape == self.mask.shape |
|
|
|
total_loss = self.Crf(x, y, self.mask) |
|
|
|
return torch.mean(total_loss) |
|
|
|
|
|
|
|
|
|
|
|
def _make_mask(self, x, seq_len): |
|
|
|
batch_size, max_len = x.size(0), x.size(1) |
|
|
|
mask = seq_mask(seq_len, max_len) |
|
|
|
mask = mask.view(batch_size, max_len) |
|
|
|
mask = mask.to(x).float() |
|
|
|
return mask |
|
|
|
|
|
|
|
|
|
|
|
def _decode(self, x): |
|
|
|
""" |
|
|
|
:param torch.FloatTensor x: [batch_size, max_len, tag_size] |
|
|
@@ -102,40 +101,40 @@ class SeqLabeling(BaseModel): |
|
|
|
class AdvSeqLabel(nn.Module): |
|
|
|
""" |
|
|
|
更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 |
|
|
|
|
|
|
|
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), |
|
|
|
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding |
|
|
|
:param int hidden_size: LSTM的隐层大小 |
|
|
|
:param int num_classes: 有多少个类 |
|
|
|
:param float dropout: LSTM中以及DropOut层的drop概率 |
|
|
|
:param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' |
|
|
|
不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 |
|
|
|
'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) |
|
|
|
:param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。 |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, init_embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'): |
|
|
|
""" |
|
|
|
|
|
|
|
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), |
|
|
|
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding |
|
|
|
:param int hidden_size: LSTM的隐层大小 |
|
|
|
:param int num_classes: 有多少个类 |
|
|
|
:param float dropout: LSTM中以及DropOut层的drop概率 |
|
|
|
:param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' |
|
|
|
不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 |
|
|
|
'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) |
|
|
|
:param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况游泳。 |
|
|
|
""" |
|
|
|
|
|
|
|
super().__init__() |
|
|
|
|
|
|
|
|
|
|
|
self.Embedding = encoder.embedding.Embedding(init_embed) |
|
|
|
self.norm1 = torch.nn.LayerNorm(self.Embedding.embedding_dim) |
|
|
|
self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout, |
|
|
|
bidirectional=True, batch_first=True) |
|
|
|
self.Rnn = encoder.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, |
|
|
|
dropout=dropout, |
|
|
|
bidirectional=True, batch_first=True) |
|
|
|
self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3) |
|
|
|
self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) |
|
|
|
self.relu = torch.nn.LeakyReLU() |
|
|
|
self.drop = torch.nn.Dropout(dropout) |
|
|
|
self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes) |
|
|
|
|
|
|
|
|
|
|
|
if id2words is None: |
|
|
|
self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False) |
|
|
|
else: |
|
|
|
self.Crf = decoder.CRF.ConditionalRandomField(num_classes, include_start_end_trans=False, |
|
|
|
allowed_transitions=allowed_transitions(id2words, |
|
|
|
encoding_type=encoding_type)) |
|
|
|
|
|
|
|
encoding_type=encoding_type)) |
|
|
|
|
|
|
|
def _decode(self, x): |
|
|
|
""" |
|
|
|
:param torch.FloatTensor x: [batch_size, max_len, tag_size] |
|
|
@@ -143,7 +142,7 @@ class AdvSeqLabel(nn.Module): |
|
|
|
""" |
|
|
|
tag_seq, _ = self.Crf.viterbi_decode(x, self.mask) |
|
|
|
return tag_seq |
|
|
|
|
|
|
|
|
|
|
|
def _internal_loss(self, x, y): |
|
|
|
""" |
|
|
|
Negative log likelihood loss. |
|
|
@@ -158,14 +157,14 @@ class AdvSeqLabel(nn.Module): |
|
|
|
assert y.shape == self.mask.shape |
|
|
|
total_loss = self.Crf(x, y, self.mask) |
|
|
|
return torch.mean(total_loss) |
|
|
|
|
|
|
|
|
|
|
|
def _make_mask(self, x, seq_len): |
|
|
|
batch_size, max_len = x.size(0), x.size(1) |
|
|
|
mask = seq_mask(seq_len, max_len) |
|
|
|
mask = mask.view(batch_size, max_len) |
|
|
|
mask = mask.to(x).float() |
|
|
|
return mask |
|
|
|
|
|
|
|
|
|
|
|
def _forward(self, words, seq_len, target=None): |
|
|
|
""" |
|
|
|
:param torch.LongTensor words: [batch_size, mex_len] |
|
|
@@ -174,24 +173,24 @@ class AdvSeqLabel(nn.Module): |
|
|
|
:return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting. |
|
|
|
If truth is not None, return loss, a scalar. Used in training. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
words = words.long() |
|
|
|
seq_len = seq_len.long() |
|
|
|
self.mask = self._make_mask(words, seq_len) |
|
|
|
|
|
|
|
|
|
|
|
# seq_len = seq_len.long() |
|
|
|
target = target.long() if target is not None else None |
|
|
|
|
|
|
|
|
|
|
|
if next(self.parameters()).is_cuda: |
|
|
|
words = words.cuda() |
|
|
|
self.mask = self.mask.cuda() |
|
|
|
|
|
|
|
|
|
|
|
x = self.Embedding(words) |
|
|
|
x = self.norm1(x) |
|
|
|
# [batch_size, max_len, word_emb_dim] |
|
|
|
|
|
|
|
|
|
|
|
x, _ = self.Rnn(x, seq_len=seq_len) |
|
|
|
|
|
|
|
|
|
|
|
x = self.Linear1(x) |
|
|
|
x = self.norm2(x) |
|
|
|
x = self.relu(x) |
|
|
@@ -201,22 +200,22 @@ class AdvSeqLabel(nn.Module): |
|
|
|
return {"loss": self._internal_loss(x, target)} |
|
|
|
else: |
|
|
|
return {"pred": self._decode(x)} |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, words, seq_len, target): |
|
|
|
""" |
|
|
|
|
|
|
|
:param torch.LongTensor words: [batch_size, mex_len] |
|
|
|
:param torch.LongTensor seq_len:[batch_size, ] |
|
|
|
:param torch.LongTensor seq_len: [batch_size, ] |
|
|
|
:param torch.LongTensor target: [batch_size, max_len], 目标 |
|
|
|
:return torch.Tensor, a scalar loss |
|
|
|
:return torch.Tensor: a scalar loss |
|
|
|
""" |
|
|
|
return self._forward(words, seq_len, target) |
|
|
|
|
|
|
|
|
|
|
|
def predict(self, words, seq_len): |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
:param torch.LongTensor words: [batch_size, mex_len] |
|
|
|
:param torch.LongTensor seq_len:[batch_size, ] |
|
|
|
:return {'pred':}, value是torch.LongTensor, [batch_size, max_len] |
|
|
|
|
|
|
|
:param torch.LongTensor seq_len: [batch_size, ] |
|
|
|
:return torch.LongTensor: [batch_size, max_len] |
|
|
|
""" |
|
|
|
return self._forward(words, seq_len) |