@@ -226,7 +226,10 @@ class LabelBilinear(nn.Module): | |||
return output | |||
class BiaffineParser(GraphParser): | |||
"""Biaffine Dependency Parser 实现. | |||
""" | |||
别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser` | |||
Biaffine Dependency Parser 实现. | |||
论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) | |||
<https://arxiv.org/abs/1611.01734>`_ . | |||
@@ -456,6 +459,8 @@ class BiaffineParser(GraphParser): | |||
class ParserLoss(LossFunc): | |||
""" | |||
别名::class:`fastNLP.models.ParserLoss` :class:`fastNLP.models.baffine_parser.ParserLoss` | |||
计算parser的loss | |||
:param pred1: [batch_size, seq_len, seq_len] 边预测logits | |||
@@ -478,6 +483,8 @@ class ParserLoss(LossFunc): | |||
class ParserMetric(MetricBase): | |||
""" | |||
别名::class:`fastNLP.models.ParserMetric` :class:`fastNLP.models.baffine_parser.ParserMetric` | |||
评估parser的性能 | |||
:param pred1: 边预测logits | |||
@@ -10,6 +10,8 @@ from ..modules import encoder | |||
class CNNText(torch.nn.Module): | |||
""" | |||
别名::class:`fastNLP.models.CNNText` :class:`fastNLP.modules.aggregator.cnn_text_classification.CNNText` | |||
使用CNN进行文本分类的模型 | |||
'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | |||
@@ -10,6 +10,8 @@ from torch import nn | |||
class SeqLabeling(BaseModel): | |||
""" | |||
别名::class:`fastNLP.models.SeqLabeling` :class:`fastNLP.modules.aggregator.sequence_labeling.SeqLabeling` | |||
一个基础的Sequence labeling的模型。 | |||
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | |||
@@ -100,6 +102,8 @@ class SeqLabeling(BaseModel): | |||
class AdvSeqLabel(nn.Module): | |||
""" | |||
别名::class:`fastNLP.models.AdvSeqLabel` :class:`fastNLP.modules.aggregator.sequence_labeling.AdvSeqLabel` | |||
更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | |||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | |||
@@ -14,8 +14,9 @@ my_inf = 10e12 | |||
class ESIM(BaseModel): | |||
""" | |||
别名::class:`fastNLP.models.ESIM` :class:`fastNLP.models.snli.ESIM` | |||
ESIM模型的一个PyTorch实现。 | |||
ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | |||
:param int vocab_size: 词表大小 | |||
@@ -49,7 +50,7 @@ class ESIM(BaseModel): | |||
) | |||
self.bi_attention = Aggregator.BiAttention() | |||
self.mean_pooling = Aggregator.MeanPoolWithMask() | |||
self.mean_pooling = Aggregator.AvgPoolWithMask() | |||
self.max_pooling = Aggregator.MaxPoolWithMask() | |||
self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size) | |||
@@ -11,6 +11,8 @@ from torch import nn | |||
class StarTransEnc(nn.Module): | |||
""" | |||
别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.start_transformer.StarTransEnc` | |||
带word embedding的Star-Transformer Encoder | |||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
@@ -93,7 +95,10 @@ class _NLICls(nn.Module): | |||
return h | |||
class STSeqLabel(nn.Module): | |||
"""用于序列标注的Star-Transformer模型 | |||
""" | |||
别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.start_transformer.STSeqLabel` | |||
用于序列标注的Star-Transformer模型 | |||
:param vocab_size: 词嵌入的词典大小 | |||
:param emb_dim: 每个词嵌入的特征维度 | |||
@@ -153,7 +158,10 @@ class STSeqLabel(nn.Module): | |||
class STSeqCls(nn.Module): | |||
"""用于分类任务的Star-Transformer | |||
""" | |||
别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.start_transformer.STSeqCls` | |||
用于分类任务的Star-Transformer | |||
:param vocab_size: 词嵌入的词典大小 | |||
:param emb_dim: 每个词嵌入的特征维度 | |||
@@ -214,7 +222,10 @@ class STSeqCls(nn.Module): | |||
class STNLICls(nn.Module): | |||
"""用于自然语言推断(NLI)的Star-Transformer | |||
""" | |||
别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.start_transformer.STNLICls` | |||
用于自然语言推断(NLI)的Star-Transformer | |||
:param vocab_size: 词嵌入的词典大小 | |||
:param emb_dim: 每个词嵌入的特征维度 | |||
@@ -1,7 +1,7 @@ | |||
from .pooling import MaxPool | |||
from .pooling import MaxPoolWithMask | |||
from .pooling import AvgPool | |||
from .pooling import MeanPoolWithMask | |||
from .pooling import AvgPoolWithMask | |||
from .attention import MultiHeadAttention, BiAttention | |||
__all__ = [ | |||
@@ -5,6 +5,8 @@ import torch.nn as nn | |||
class MaxPool(nn.Module): | |||
""" | |||
别名::class:`fastNLP.modules.aggregator.MaxPool` :class:`fastNLP.modules.aggregator.pooling.MaxPool` | |||
Max-pooling模块。 | |||
:param stride: 窗口移动大小,默认为kernel_size | |||
@@ -12,11 +14,9 @@ class MaxPool(nn.Module): | |||
:param dilation: 控制窗口内元素移动距离的大小 | |||
:param dimension: MaxPool的维度,支持1,2,3维。 | |||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||
:param return_indices: | |||
:param ceil_mode: | |||
""" | |||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | |||
return_indices=False, ceil_mode=False): | |||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False): | |||
super(MaxPool, self).__init__() | |||
assert (1 <= dimension) and (dimension <= 3) | |||
@@ -25,7 +25,6 @@ class MaxPool(nn.Module): | |||
self.padding = padding | |||
self.dilation = dilation | |||
self.kernel_size = kernel_size | |||
self.return_indices = return_indices | |||
self.ceil_mode = ceil_mode | |||
def forward(self, x): | |||
@@ -33,27 +32,31 @@ class MaxPool(nn.Module): | |||
pooling = nn.MaxPool1d( | |||
stride=self.stride, padding=self.padding, dilation=self.dilation, | |||
kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1), | |||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||
return_indices=False, ceil_mode=self.ceil_mode | |||
) | |||
x = torch.transpose(x, 1, 2) # [N,L,C] -> [N,C,L] | |||
elif self.dimension == 2: | |||
pooling = nn.MaxPool2d( | |||
stride=self.stride, padding=self.padding, dilation=self.dilation, | |||
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)), | |||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||
return_indices=False, ceil_mode=self.ceil_mode | |||
) | |||
else: | |||
pooling = nn.MaxPool2d( | |||
stride=self.stride, padding=self.padding, dilation=self.dilation, | |||
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)), | |||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||
return_indices=False, ceil_mode=self.ceil_mode | |||
) | |||
x = pooling(x) | |||
return x.squeeze(dim=-1) # [N,C,1] -> [N,C] | |||
class MaxPoolWithMask(nn.Module): | |||
"""带mask矩阵的1维max pooling""" | |||
""" | |||
别名::class:`fastNLP.modules.aggregator.MaxPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.MaxPoolWithMask` | |||
带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。 | |||
""" | |||
def __init__(self): | |||
super(MaxPoolWithMask, self).__init__() | |||
@@ -89,7 +92,11 @@ class KMaxPool(nn.Module): | |||
class AvgPool(nn.Module): | |||
"""1-d average pooling module.""" | |||
""" | |||
别名::class:`fastNLP.modules.aggregator.AvgPool` :class:`fastNLP.modules.aggregator.pooling.AvgPool` | |||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size] | |||
""" | |||
def __init__(self, stride=None, padding=0): | |||
super(AvgPool, self).__init__() | |||
@@ -111,10 +118,16 @@ class AvgPool(nn.Module): | |||
return x.squeeze(dim=-1) | |||
class MeanPoolWithMask(nn.Module): | |||
class AvgPoolWithMask(nn.Module): | |||
""" | |||
别名::class:`fastNLP.modules.aggregator.AvgPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.AvgPoolWithMask` | |||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling | |||
的时候只会考虑mask为1的位置 | |||
""" | |||
def __init__(self): | |||
super(MeanPoolWithMask, self).__init__() | |||
super(AvgPoolWithMask, self).__init__() | |||
self.inf = 10e12 | |||
def forward(self, tensor, mask, dim=1): | |||
@@ -6,6 +6,8 @@ from ..utils import initial_parameter | |||
def allowed_transitions(id2target, encoding_type='bio', include_start_end=True): | |||
""" | |||
别名::class:`fastNLP.modules.decoder.allowed_transitions` :class:`fastNLP.modules.decoder.CRF.allowed_transitions` | |||
给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 | |||
:param dict id2target: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是 | |||
@@ -133,7 +135,10 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label | |||
class ConditionalRandomField(nn.Module): | |||
"""条件随机场。 | |||
""" | |||
别名::class:`fastNLP.modules.decoder.ConditionalRandomField` :class:`fastNLP.modules.decoder.CRF.ConditionalRandomField` | |||
条件随机场。 | |||
提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 | |||
:param int num_tags: 标签的数量 | |||
@@ -5,7 +5,10 @@ from ..utils import initial_parameter | |||
class MLP(nn.Module): | |||
"""Multilayer Perceptrons as a decoder | |||
""" | |||
别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.MLP.MLP` | |||
多层感知器 | |||
:param list size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1 | |||
:param str or list activation: | |||
@@ -3,7 +3,10 @@ import torch | |||
def viterbi_decode(logits, transitions, mask=None, unpad=False): | |||
"""给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | |||
""" | |||
别名::class:`fastNLP.modules.decoder.viterbi_decode` :class:`fastNLP.modules.decoder.utils.viterbi_decode | |||
给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | |||
:param torch.FloatTensor logits: batch_size x max_len x num_tags,特征矩阵。 | |||
:param torch.FloatTensor transitions: n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。 | |||
@@ -2,8 +2,11 @@ import torch | |||
__all__ = [] | |||
class TimestepDropout(torch.nn.Dropout): | |||
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | |||
dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step. | |||
""" | |||
别名::class:`fastNLP.modules.TimestepDropout` | |||
接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``) | |||
在每个timestamp上做dropout。 | |||
""" | |||
def forward(self, x): | |||
@@ -7,6 +7,8 @@ from ..utils import initial_parameter | |||
# from torch.nn.init import xavier_uniform | |||
class ConvolutionCharEncoder(nn.Module): | |||
""" | |||
别名::class:`fastNLP.modules.encoder.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder` | |||
char级别的卷积编码器. | |||
:param int char_emb_size: char级别embedding的维度. Default: 50 | |||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||
@@ -9,10 +9,12 @@ from ..utils import initial_parameter | |||
class ConvMaxpool(nn.Module): | |||
"""集合了Convolution和Max-Pooling于一体的层。 | |||
给定一个batch_size x max_len x input_size的输入,返回batch_size x sum(output_channels) 大小的matrix。在内部,是先使用 | |||
CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len)这一维进行max_pooling。最后得到每个sample的一个vector | |||
表示。 | |||
""" | |||
别名::class:`fastNLP.modules.encoder.ConvMaxpool` :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool` | |||
集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x | |||
sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len) | |||
这一维进行max_pooling。最后得到每个sample的一个向量表示。 | |||
:param int in_channels: 输入channel的大小,一般是embedding的维度; 或encoder的output维度 | |||
:param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | |||
@@ -2,7 +2,10 @@ import torch.nn as nn | |||
from ..utils import get_embeddings | |||
class Embedding(nn.Embedding): | |||
"""Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" | |||
""" | |||
别名::class:`fastNLP.modules.Embedding` :class:`fastNLP.modules.encoder.embedding.Embedding` | |||
Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" | |||
def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2, | |||
scale_grad_by_freq=False): | |||
@@ -9,7 +9,10 @@ from ..utils import initial_parameter | |||
class LSTM(nn.Module): | |||
"""LSTM 模块, 轻量封装的Pytorch LSTM | |||
""" | |||
别名::class:`fastNLP.modules.encoder.LSTM` :class:`fastNLP.modules.encoder.lstm.LSTM` | |||
LSTM 模块, 轻量封装的Pytorch LSTM | |||
:param input_size: 输入 `x` 的特征维度 | |||
:param hidden_size: 隐状态 `h` 的特征维度 | |||
@@ -8,6 +8,9 @@ import numpy as NP | |||
class StarTransformer(nn.Module): | |||
""" | |||
别名::class:`fastNLP.modules.encoder.StarTransformer` :class:`fastNLP.modules.encoder.star_transformer.StarTransformer` | |||
Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 | |||
paper: https://arxiv.org/abs/1902.09113 | |||
@@ -5,7 +5,11 @@ from ..dropout import TimestepDropout | |||
class TransformerEncoder(nn.Module): | |||
"""transformer的encoder模块,不包含embedding层 | |||
""" | |||
别名::class:`fastNLP.modules.encoder.TransformerEncoder` :class:`fastNLP.modules.encoder.transformer.TransformerEncoder` | |||
transformer的encoder模块,不包含embedding层 | |||
:param int num_layers: transformer的层数 | |||
:param int model_size: 输入维度的大小。同时也是输出维度的大小。 | |||
@@ -197,7 +197,10 @@ class VarRNNBase(nn.Module): | |||
return output, hidden | |||
class VarLSTM(VarRNNBase): | |||
"""Variational Dropout LSTM. | |||
""" | |||
别名::class:`fastNLP.modules.encoder.VarLSTM` :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM` | |||
Variational Dropout LSTM. | |||
:param input_size: 输入 `x` 的特征维度 | |||
:param hidden_size: 隐状态 `h` 的特征维度 | |||
@@ -218,7 +221,10 @@ class VarLSTM(VarRNNBase): | |||
class VarRNN(VarRNNBase): | |||
"""Variational Dropout RNN. | |||
""" | |||
别名::class:`fastNLP.modules.encoder.VarRNN` :class:`fastNLP.modules.encoder.variational_rnn.VarRNN` | |||
Variational Dropout RNN. | |||
:param input_size: 输入 `x` 的特征维度 | |||
:param hidden_size: 隐状态 `h` 的特征维度 | |||
@@ -238,7 +244,10 @@ class VarRNN(VarRNNBase): | |||
return super(VarRNN, self).forward(x, hx) | |||
class VarGRU(VarRNNBase): | |||
"""Variational Dropout GRU. | |||
""" | |||
别名::class:`fastNLP.modules.encoder.VarGRU` :class:`fastNLP.modules.encoder.variational_rnn.VarGRU` | |||
Variational Dropout GRU. | |||
:param input_size: 输入 `x` 的特征维度 | |||
:param hidden_size: 隐状态 `h` 的特征维度 | |||
@@ -257,35 +266,3 @@ class VarGRU(VarRNNBase): | |||
def forward(self, x, hx=None): | |||
return super(VarGRU, self).forward(x, hx) | |||
# if __name__ == '__main__': | |||
# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1 | |||
# mask = (x != 0).float().view(3, -1) | |||
# seq_lens = torch.LongTensor([3,2,1]) | |||
# y = torch.Tensor([[0,1,1], [1,1,0], [0,0,0]]) | |||
# # rev = _reverse_packed_sequence(pack) | |||
# # # print(rev) | |||
# lstm = VarLSTM(input_size=1, num_layers=2, hidden_size=2, | |||
# batch_first=True, bidirectional=True, | |||
# input_dropout=0.0, hidden_dropout=0.0,) | |||
# # lstm = nn.LSTM(input_size=1, num_layers=2, hidden_size=2, | |||
# # batch_first=True, bidirectional=True,) | |||
# loss = nn.BCELoss() | |||
# m = nn.Sigmoid() | |||
# optim = torch.optim.SGD(lstm.parameters(), lr=1e-3) | |||
# for i in range(2000): | |||
# optim.zero_grad() | |||
# pack = pack_padded_sequence(x, seq_lens, batch_first=True) | |||
# out, hidden = lstm(pack) | |||
# out, lens = pad_packed_sequence(out, batch_first=True) | |||
# # print(lens) | |||
# # print(out) | |||
# # print(hidden[0]) | |||
# # print(hidden[0].size()) | |||
# # print(hidden[1]) | |||
# out = out.sum(-1) | |||
# out = m(out) * mask | |||
# l = loss(out, y) | |||
# l.backward() | |||
# optim.step() | |||
# if i % 50 == 0: | |||
# print(out) |
@@ -70,10 +70,7 @@ def initial_parameter(net, initial_method=None): | |||
def get_embeddings(init_embed): | |||
""" | |||
得到词嵌入 | |||
.. todo:: | |||
补上文档 | |||
根据输入的init_embed生成nn.Embedding对象。 | |||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||