diff --git a/fastNLP/models/biaffine_parser.py b/fastNLP/models/biaffine_parser.py index 3a5607f5..100bfb72 100644 --- a/fastNLP/models/biaffine_parser.py +++ b/fastNLP/models/biaffine_parser.py @@ -226,7 +226,10 @@ class LabelBilinear(nn.Module): return output class BiaffineParser(GraphParser): - """Biaffine Dependency Parser 实现. + """ + 别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser` + + Biaffine Dependency Parser 实现. 论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) `_ . @@ -456,6 +459,8 @@ class BiaffineParser(GraphParser): class ParserLoss(LossFunc): """ + 别名::class:`fastNLP.models.ParserLoss` :class:`fastNLP.models.baffine_parser.ParserLoss` + 计算parser的loss :param pred1: [batch_size, seq_len, seq_len] 边预测logits @@ -478,6 +483,8 @@ class ParserLoss(LossFunc): class ParserMetric(MetricBase): """ + 别名::class:`fastNLP.models.ParserMetric` :class:`fastNLP.models.baffine_parser.ParserMetric` + 评估parser的性能 :param pred1: 边预测logits diff --git a/fastNLP/models/cnn_text_classification.py b/fastNLP/models/cnn_text_classification.py index 5df4e62a..eb829601 100644 --- a/fastNLP/models/cnn_text_classification.py +++ b/fastNLP/models/cnn_text_classification.py @@ -10,6 +10,8 @@ from ..modules import encoder class CNNText(torch.nn.Module): """ + 别名::class:`fastNLP.models.CNNText` :class:`fastNLP.modules.aggregator.cnn_text_classification.CNNText` + 使用CNN进行文本分类的模型 'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' diff --git a/fastNLP/models/sequence_labeling.py b/fastNLP/models/sequence_labeling.py index 98badd56..6cfbf28d 100644 --- a/fastNLP/models/sequence_labeling.py +++ b/fastNLP/models/sequence_labeling.py @@ -10,6 +10,8 @@ from torch import nn class SeqLabeling(BaseModel): """ + 别名::class:`fastNLP.models.SeqLabeling` :class:`fastNLP.modules.aggregator.sequence_labeling.SeqLabeling` + 一个基础的Sequence labeling的模型。 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 @@ -100,6 +102,8 @@ class SeqLabeling(BaseModel): class AdvSeqLabel(nn.Module): """ + 别名::class:`fastNLP.models.AdvSeqLabel` :class:`fastNLP.modules.aggregator.sequence_labeling.AdvSeqLabel` + 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), diff --git a/fastNLP/models/snli.py b/fastNLP/models/snli.py index ac0a2e47..34b54302 100644 --- a/fastNLP/models/snli.py +++ b/fastNLP/models/snli.py @@ -14,8 +14,9 @@ my_inf = 10e12 class ESIM(BaseModel): """ + 别名::class:`fastNLP.models.ESIM` :class:`fastNLP.models.snli.ESIM` + ESIM模型的一个PyTorch实现。 - ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) :param int vocab_size: 词表大小 @@ -49,7 +50,7 @@ class ESIM(BaseModel): ) self.bi_attention = Aggregator.BiAttention() - self.mean_pooling = Aggregator.MeanPoolWithMask() + self.mean_pooling = Aggregator.AvgPoolWithMask() self.max_pooling = Aggregator.MaxPoolWithMask() self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size) diff --git a/fastNLP/models/star_transformer.py b/fastNLP/models/star_transformer.py index f7b9028e..cdd1f321 100644 --- a/fastNLP/models/star_transformer.py +++ b/fastNLP/models/star_transformer.py @@ -11,6 +11,8 @@ from torch import nn class StarTransEnc(nn.Module): """ + 别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.start_transformer.StarTransEnc` + 带word embedding的Star-Transformer Encoder :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 @@ -93,7 +95,10 @@ class _NLICls(nn.Module): return h class STSeqLabel(nn.Module): - """用于序列标注的Star-Transformer模型 + """ + 别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.start_transformer.STSeqLabel` + + 用于序列标注的Star-Transformer模型 :param vocab_size: 词嵌入的词典大小 :param emb_dim: 每个词嵌入的特征维度 @@ -153,7 +158,10 @@ class STSeqLabel(nn.Module): class STSeqCls(nn.Module): - """用于分类任务的Star-Transformer + """ + 别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.start_transformer.STSeqCls` + + 用于分类任务的Star-Transformer :param vocab_size: 词嵌入的词典大小 :param emb_dim: 每个词嵌入的特征维度 @@ -214,7 +222,10 @@ class STSeqCls(nn.Module): class STNLICls(nn.Module): - """用于自然语言推断(NLI)的Star-Transformer + """ + 别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.start_transformer.STNLICls` + + 用于自然语言推断(NLI)的Star-Transformer :param vocab_size: 词嵌入的词典大小 :param emb_dim: 每个词嵌入的特征维度 diff --git a/fastNLP/modules/aggregator/__init__.py b/fastNLP/modules/aggregator/__init__.py index 4a76cf5b..725ccd4b 100644 --- a/fastNLP/modules/aggregator/__init__.py +++ b/fastNLP/modules/aggregator/__init__.py @@ -1,7 +1,7 @@ from .pooling import MaxPool from .pooling import MaxPoolWithMask from .pooling import AvgPool -from .pooling import MeanPoolWithMask +from .pooling import AvgPoolWithMask from .attention import MultiHeadAttention, BiAttention __all__ = [ diff --git a/fastNLP/modules/aggregator/pooling.py b/fastNLP/modules/aggregator/pooling.py index 47050355..5d83ef68 100644 --- a/fastNLP/modules/aggregator/pooling.py +++ b/fastNLP/modules/aggregator/pooling.py @@ -5,6 +5,8 @@ import torch.nn as nn class MaxPool(nn.Module): """ + 别名::class:`fastNLP.modules.aggregator.MaxPool` :class:`fastNLP.modules.aggregator.pooling.MaxPool` + Max-pooling模块。 :param stride: 窗口移动大小,默认为kernel_size @@ -12,11 +14,9 @@ class MaxPool(nn.Module): :param dilation: 控制窗口内元素移动距离的大小 :param dimension: MaxPool的维度,支持1,2,3维。 :param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension - :param return_indices: :param ceil_mode: """ - def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, - return_indices=False, ceil_mode=False): + def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False): super(MaxPool, self).__init__() assert (1 <= dimension) and (dimension <= 3) @@ -25,7 +25,6 @@ class MaxPool(nn.Module): self.padding = padding self.dilation = dilation self.kernel_size = kernel_size - self.return_indices = return_indices self.ceil_mode = ceil_mode def forward(self, x): @@ -33,27 +32,31 @@ class MaxPool(nn.Module): pooling = nn.MaxPool1d( stride=self.stride, padding=self.padding, dilation=self.dilation, kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1), - return_indices=self.return_indices, ceil_mode=self.ceil_mode + return_indices=False, ceil_mode=self.ceil_mode ) x = torch.transpose(x, 1, 2) # [N,L,C] -> [N,C,L] elif self.dimension == 2: pooling = nn.MaxPool2d( stride=self.stride, padding=self.padding, dilation=self.dilation, kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)), - return_indices=self.return_indices, ceil_mode=self.ceil_mode + return_indices=False, ceil_mode=self.ceil_mode ) else: pooling = nn.MaxPool2d( stride=self.stride, padding=self.padding, dilation=self.dilation, kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)), - return_indices=self.return_indices, ceil_mode=self.ceil_mode + return_indices=False, ceil_mode=self.ceil_mode ) x = pooling(x) return x.squeeze(dim=-1) # [N,C,1] -> [N,C] class MaxPoolWithMask(nn.Module): - """带mask矩阵的1维max pooling""" + """ + 别名::class:`fastNLP.modules.aggregator.MaxPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.MaxPoolWithMask` + + 带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。 + """ def __init__(self): super(MaxPoolWithMask, self).__init__() @@ -89,7 +92,11 @@ class KMaxPool(nn.Module): class AvgPool(nn.Module): - """1-d average pooling module.""" + """ + 别名::class:`fastNLP.modules.aggregator.AvgPool` :class:`fastNLP.modules.aggregator.pooling.AvgPool` + + 给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size] + """ def __init__(self, stride=None, padding=0): super(AvgPool, self).__init__() @@ -111,10 +118,16 @@ class AvgPool(nn.Module): return x.squeeze(dim=-1) -class MeanPoolWithMask(nn.Module): +class AvgPoolWithMask(nn.Module): + """ + 别名::class:`fastNLP.modules.aggregator.AvgPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.AvgPoolWithMask` + + 给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling + 的时候只会考虑mask为1的位置 + """ def __init__(self): - super(MeanPoolWithMask, self).__init__() + super(AvgPoolWithMask, self).__init__() self.inf = 10e12 def forward(self, tensor, mask, dim=1): diff --git a/fastNLP/modules/decoder/CRF.py b/fastNLP/modules/decoder/CRF.py index 2c9080b2..275f955c 100644 --- a/fastNLP/modules/decoder/CRF.py +++ b/fastNLP/modules/decoder/CRF.py @@ -6,6 +6,8 @@ from ..utils import initial_parameter def allowed_transitions(id2target, encoding_type='bio', include_start_end=True): """ + 别名::class:`fastNLP.modules.decoder.allowed_transitions` :class:`fastNLP.modules.decoder.CRF.allowed_transitions` + 给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 :param dict id2target: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是 @@ -133,7 +135,10 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label class ConditionalRandomField(nn.Module): - """条件随机场。 + """ + 别名::class:`fastNLP.modules.decoder.ConditionalRandomField` :class:`fastNLP.modules.decoder.CRF.ConditionalRandomField` + + 条件随机场。 提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 :param int num_tags: 标签的数量 diff --git a/fastNLP/modules/decoder/MLP.py b/fastNLP/modules/decoder/MLP.py index 35484932..71d899b0 100644 --- a/fastNLP/modules/decoder/MLP.py +++ b/fastNLP/modules/decoder/MLP.py @@ -5,7 +5,10 @@ from ..utils import initial_parameter class MLP(nn.Module): - """Multilayer Perceptrons as a decoder + """ + 别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.MLP.MLP` + + 多层感知器 :param list size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1 :param str or list activation: diff --git a/fastNLP/modules/decoder/utils.py b/fastNLP/modules/decoder/utils.py index 95b25767..1e7a4258 100644 --- a/fastNLP/modules/decoder/utils.py +++ b/fastNLP/modules/decoder/utils.py @@ -3,7 +3,10 @@ import torch def viterbi_decode(logits, transitions, mask=None, unpad=False): - """给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 + """ + 别名::class:`fastNLP.modules.decoder.viterbi_decode` :class:`fastNLP.modules.decoder.utils.viterbi_decode + + 给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 :param torch.FloatTensor logits: batch_size x max_len x num_tags,特征矩阵。 :param torch.FloatTensor transitions: n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。 diff --git a/fastNLP/modules/dropout.py b/fastNLP/modules/dropout.py index 97745c00..34b426fd 100644 --- a/fastNLP/modules/dropout.py +++ b/fastNLP/modules/dropout.py @@ -2,8 +2,11 @@ import torch __all__ = [] class TimestepDropout(torch.nn.Dropout): - """This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single - dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step. + """ + 别名::class:`fastNLP.modules.TimestepDropout` + + 接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``) + 在每个timestamp上做dropout。 """ def forward(self, x): diff --git a/fastNLP/modules/encoder/char_encoder.py b/fastNLP/modules/encoder/char_encoder.py index c3886c86..be04a6be 100644 --- a/fastNLP/modules/encoder/char_encoder.py +++ b/fastNLP/modules/encoder/char_encoder.py @@ -7,6 +7,8 @@ from ..utils import initial_parameter # from torch.nn.init import xavier_uniform class ConvolutionCharEncoder(nn.Module): """ + 别名::class:`fastNLP.modules.encoder.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder` + char级别的卷积编码器. :param int char_emb_size: char级别embedding的维度. Default: 50 例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. diff --git a/fastNLP/modules/encoder/conv_maxpool.py b/fastNLP/modules/encoder/conv_maxpool.py index d01eddea..6b4c39ed 100644 --- a/fastNLP/modules/encoder/conv_maxpool.py +++ b/fastNLP/modules/encoder/conv_maxpool.py @@ -9,10 +9,12 @@ from ..utils import initial_parameter class ConvMaxpool(nn.Module): - """集合了Convolution和Max-Pooling于一体的层。 - 给定一个batch_size x max_len x input_size的输入,返回batch_size x sum(output_channels) 大小的matrix。在内部,是先使用 - CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len)这一维进行max_pooling。最后得到每个sample的一个vector - 表示。 + """ + 别名::class:`fastNLP.modules.encoder.ConvMaxpool` :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool` + + 集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x + sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len) + 这一维进行max_pooling。最后得到每个sample的一个向量表示。 :param int in_channels: 输入channel的大小,一般是embedding的维度; 或encoder的output维度 :param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 diff --git a/fastNLP/modules/encoder/embedding.py b/fastNLP/modules/encoder/embedding.py index 8cc53b0b..c402f318 100644 --- a/fastNLP/modules/encoder/embedding.py +++ b/fastNLP/modules/encoder/embedding.py @@ -2,7 +2,10 @@ import torch.nn as nn from ..utils import get_embeddings class Embedding(nn.Embedding): - """Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" + """ + 别名::class:`fastNLP.modules.Embedding` :class:`fastNLP.modules.encoder.embedding.Embedding` + + Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2, scale_grad_by_freq=False): diff --git a/fastNLP/modules/encoder/lstm.py b/fastNLP/modules/encoder/lstm.py index b11a84eb..ada34c26 100644 --- a/fastNLP/modules/encoder/lstm.py +++ b/fastNLP/modules/encoder/lstm.py @@ -9,7 +9,10 @@ from ..utils import initial_parameter class LSTM(nn.Module): - """LSTM 模块, 轻量封装的Pytorch LSTM + """ + 别名::class:`fastNLP.modules.encoder.LSTM` :class:`fastNLP.modules.encoder.lstm.LSTM` + + LSTM 模块, 轻量封装的Pytorch LSTM :param input_size: 输入 `x` 的特征维度 :param hidden_size: 隐状态 `h` 的特征维度 diff --git a/fastNLP/modules/encoder/star_transformer.py b/fastNLP/modules/encoder/star_transformer.py index 42662804..e721c16f 100644 --- a/fastNLP/modules/encoder/star_transformer.py +++ b/fastNLP/modules/encoder/star_transformer.py @@ -8,6 +8,9 @@ import numpy as NP class StarTransformer(nn.Module): """ + 别名::class:`fastNLP.modules.encoder.StarTransformer` :class:`fastNLP.modules.encoder.star_transformer.StarTransformer` + + Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 paper: https://arxiv.org/abs/1902.09113 diff --git a/fastNLP/modules/encoder/transformer.py b/fastNLP/modules/encoder/transformer.py index 60216c2b..9647f86e 100644 --- a/fastNLP/modules/encoder/transformer.py +++ b/fastNLP/modules/encoder/transformer.py @@ -5,7 +5,11 @@ from ..dropout import TimestepDropout class TransformerEncoder(nn.Module): - """transformer的encoder模块,不包含embedding层 + """ + 别名::class:`fastNLP.modules.encoder.TransformerEncoder` :class:`fastNLP.modules.encoder.transformer.TransformerEncoder` + + + transformer的encoder模块,不包含embedding层 :param int num_layers: transformer的层数 :param int model_size: 输入维度的大小。同时也是输出维度的大小。 diff --git a/fastNLP/modules/encoder/variational_rnn.py b/fastNLP/modules/encoder/variational_rnn.py index 5a2e99f3..b3858020 100644 --- a/fastNLP/modules/encoder/variational_rnn.py +++ b/fastNLP/modules/encoder/variational_rnn.py @@ -197,7 +197,10 @@ class VarRNNBase(nn.Module): return output, hidden class VarLSTM(VarRNNBase): - """Variational Dropout LSTM. + """ + 别名::class:`fastNLP.modules.encoder.VarLSTM` :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM` + + Variational Dropout LSTM. :param input_size: 输入 `x` 的特征维度 :param hidden_size: 隐状态 `h` 的特征维度 @@ -218,7 +221,10 @@ class VarLSTM(VarRNNBase): class VarRNN(VarRNNBase): - """Variational Dropout RNN. + """ + 别名::class:`fastNLP.modules.encoder.VarRNN` :class:`fastNLP.modules.encoder.variational_rnn.VarRNN` + + Variational Dropout RNN. :param input_size: 输入 `x` 的特征维度 :param hidden_size: 隐状态 `h` 的特征维度 @@ -238,7 +244,10 @@ class VarRNN(VarRNNBase): return super(VarRNN, self).forward(x, hx) class VarGRU(VarRNNBase): - """Variational Dropout GRU. + """ + 别名::class:`fastNLP.modules.encoder.VarGRU` :class:`fastNLP.modules.encoder.variational_rnn.VarGRU` + + Variational Dropout GRU. :param input_size: 输入 `x` 的特征维度 :param hidden_size: 隐状态 `h` 的特征维度 @@ -257,35 +266,3 @@ class VarGRU(VarRNNBase): def forward(self, x, hx=None): return super(VarGRU, self).forward(x, hx) -# if __name__ == '__main__': -# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1 -# mask = (x != 0).float().view(3, -1) -# seq_lens = torch.LongTensor([3,2,1]) -# y = torch.Tensor([[0,1,1], [1,1,0], [0,0,0]]) -# # rev = _reverse_packed_sequence(pack) -# # # print(rev) -# lstm = VarLSTM(input_size=1, num_layers=2, hidden_size=2, -# batch_first=True, bidirectional=True, -# input_dropout=0.0, hidden_dropout=0.0,) -# # lstm = nn.LSTM(input_size=1, num_layers=2, hidden_size=2, -# # batch_first=True, bidirectional=True,) -# loss = nn.BCELoss() -# m = nn.Sigmoid() -# optim = torch.optim.SGD(lstm.parameters(), lr=1e-3) -# for i in range(2000): -# optim.zero_grad() -# pack = pack_padded_sequence(x, seq_lens, batch_first=True) -# out, hidden = lstm(pack) -# out, lens = pad_packed_sequence(out, batch_first=True) -# # print(lens) -# # print(out) -# # print(hidden[0]) -# # print(hidden[0].size()) -# # print(hidden[1]) -# out = out.sum(-1) -# out = m(out) * mask -# l = loss(out, y) -# l.backward() -# optim.step() -# if i % 50 == 0: -# print(out) diff --git a/fastNLP/modules/utils.py b/fastNLP/modules/utils.py index 047ebb78..3dfe1969 100644 --- a/fastNLP/modules/utils.py +++ b/fastNLP/modules/utils.py @@ -70,10 +70,7 @@ def initial_parameter(net, initial_method=None): def get_embeddings(init_embed): """ - 得到词嵌入 - - .. todo:: - 补上文档 + 根据输入的init_embed生成nn.Embedding对象。 :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,