增加别名

6 years ago · 6aaef9175c
--- a/fastNLP/models/biaffine_parser.py
+++ b/fastNLP/models/biaffine_parser.py
@@ -226,7 +226,10 @@ class LabelBilinear(nn.Module):
        return output
 class BiaffineParser(GraphParser):
    """Biaffine Dependency Parser 实现.
    """
    别名：:class:`fastNLP.models.BiaffineParser`  :class:`fastNLP.models.baffine_parser.BiaffineParser`
    Biaffine Dependency Parser 实现.
    论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016)
    <https://arxiv.org/abs/1611.01734>`_ .
@@ -456,6 +459,8 @@ class BiaffineParser(GraphParser):
 class ParserLoss(LossFunc):
    """
    别名：:class:`fastNLP.models.ParserLoss`  :class:`fastNLP.models.baffine_parser.ParserLoss`
    计算parser的loss
    :param pred1: [batch_size, seq_len, seq_len] 边预测logits
@@ -478,6 +483,8 @@ class ParserLoss(LossFunc):
 class ParserMetric(MetricBase):
    """
    别名：:class:`fastNLP.models.ParserMetric`  :class:`fastNLP.models.baffine_parser.ParserMetric`
    评估parser的性能
    :param pred1: 边预测logits
--- a/fastNLP/models/cnn_text_classification.py
+++ b/fastNLP/models/cnn_text_classification.py
@@ -10,6 +10,8 @@ from ..modules import encoder
 class CNNText(torch.nn.Module):
    """
    别名：:class:`fastNLP.models.CNNText`  :class:`fastNLP.modules.aggregator.cnn_text_classification.CNNText`
    使用CNN进行文本分类的模型
    'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.'
--- a/fastNLP/models/sequence_labeling.py
+++ b/fastNLP/models/sequence_labeling.py
@@ -10,6 +10,8 @@ from torch import nn
 class SeqLabeling(BaseModel):
    """
    别名：:class:`fastNLP.models.SeqLabeling`  :class:`fastNLP.modules.aggregator.sequence_labeling.SeqLabeling`
    一个基础的Sequence labeling的模型。
    用于做sequence labeling的基础类。结构包含一层Embedding，一层LSTM(单向，一层)，一层FC，以及一层CRF。
@@ -100,6 +102,8 @@ class SeqLabeling(BaseModel):
 class AdvSeqLabel(nn.Module):
    """
    别名：:class:`fastNLP.models.AdvSeqLabel`  :class:`fastNLP.modules.aggregator.sequence_labeling.AdvSeqLabel`
    更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层)，FC，LayerNorm，DropOut，FC，CRF。
    :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
--- a/fastNLP/models/snli.py
+++ b/fastNLP/models/snli.py
@@ -14,8 +14,9 @@ my_inf = 10e12
 class ESIM(BaseModel):
    """
    别名：:class:`fastNLP.models.ESIM`  :class:`fastNLP.models.snli.ESIM`
    ESIM模型的一个PyTorch实现。
    ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038)
    :param int vocab_size: 词表大小
@@ -49,7 +50,7 @@ class ESIM(BaseModel):
        )
        self.bi_attention = Aggregator.BiAttention()
        self.mean_pooling = Aggregator.MeanPoolWithMask()
        self.mean_pooling = Aggregator.AvgPoolWithMask()
        self.max_pooling = Aggregator.MaxPoolWithMask()
        self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size)
--- a/fastNLP/models/star_transformer.py
+++ b/fastNLP/models/star_transformer.py
@@ -11,6 +11,8 @@ from torch import nn
 class StarTransEnc(nn.Module):
    """
    别名：:class:`fastNLP.models.StarTransEnc`  :class:`fastNLP.models.start_transformer.StarTransEnc`
    带word embedding的Star-Transformer Encoder
    :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
@@ -93,7 +95,10 @@ class _NLICls(nn.Module):
        return h
 class STSeqLabel(nn.Module):
    """用于序列标注的Star-Transformer模型
    """
    别名：:class:`fastNLP.models.STSeqLabel`  :class:`fastNLP.models.start_transformer.STSeqLabel`
    用于序列标注的Star-Transformer模型
    :param vocab_size: 词嵌入的词典大小
    :param emb_dim: 每个词嵌入的特征维度
@@ -153,7 +158,10 @@ class STSeqLabel(nn.Module):
 class STSeqCls(nn.Module):
    """用于分类任务的Star-Transformer
    """
    别名：:class:`fastNLP.models.STSeqCls`  :class:`fastNLP.models.start_transformer.STSeqCls`
    用于分类任务的Star-Transformer
    :param vocab_size: 词嵌入的词典大小
    :param emb_dim: 每个词嵌入的特征维度
@@ -214,7 +222,10 @@ class STSeqCls(nn.Module):
 class STNLICls(nn.Module):
    """用于自然语言推断(NLI)的Star-Transformer
    """
    别名：:class:`fastNLP.models.STNLICls`  :class:`fastNLP.models.start_transformer.STNLICls`
    用于自然语言推断(NLI)的Star-Transformer
    :param vocab_size: 词嵌入的词典大小
    :param emb_dim: 每个词嵌入的特征维度
--- a/fastNLP/modules/aggregator/init.py
+++ b/fastNLP/modules/aggregator/init.py
@@ -1,7 +1,7 @@
 from .pooling import MaxPool
 from .pooling import MaxPoolWithMask
 from .pooling import AvgPool
 from .pooling import MeanPoolWithMask
 from .pooling import AvgPoolWithMask
 from .attention import MultiHeadAttention, BiAttention
 __all__ = [
--- a/fastNLP/modules/aggregator/pooling.py
+++ b/fastNLP/modules/aggregator/pooling.py
@@ -5,6 +5,8 @@ import torch.nn as nn
 class MaxPool(nn.Module):
    """
    别名：:class:`fastNLP.modules.aggregator.MaxPool`  :class:`fastNLP.modules.aggregator.pooling.MaxPool`
    Max-pooling模块。
    :param stride: 窗口移动大小，默认为kernel_size
@@ -12,11 +14,9 @@ class MaxPool(nn.Module):
    :param dilation: 控制窗口内元素移动距离的大小
    :param dimension: MaxPool的维度，支持1，2，3维。
    :param kernel_size: max pooling的窗口大小，默认为tensor最后k维，其中k为dimension
    :param return_indices:
    :param ceil_mode:
    """
    def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None,
                 return_indices=False, ceil_mode=False):
    def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False):
        super(MaxPool, self).__init__()
        assert (1 <= dimension) and (dimension <= 3)
@@ -25,7 +25,6 @@ class MaxPool(nn.Module):
        self.padding = padding
        self.dilation = dilation
        self.kernel_size = kernel_size
        self.return_indices = return_indices
        self.ceil_mode = ceil_mode
    def forward(self, x):
@@ -33,27 +32,31 @@ class MaxPool(nn.Module):
            pooling = nn.MaxPool1d(
                stride=self.stride, padding=self.padding, dilation=self.dilation,
                kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1),
                return_indices=self.return_indices, ceil_mode=self.ceil_mode
                return_indices=False, ceil_mode=self.ceil_mode
            )
            x = torch.transpose(x, 1, 2)  # [N,L,C] -> [N,C,L]
        elif self.dimension == 2:
            pooling = nn.MaxPool2d(
                stride=self.stride, padding=self.padding, dilation=self.dilation,
                kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)),
                return_indices=self.return_indices, ceil_mode=self.ceil_mode
                return_indices=False, ceil_mode=self.ceil_mode
            )
        else:
            pooling = nn.MaxPool2d(
                stride=self.stride, padding=self.padding, dilation=self.dilation,
                kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)),
                return_indices=self.return_indices, ceil_mode=self.ceil_mode
                return_indices=False, ceil_mode=self.ceil_mode
            )
        x = pooling(x)
        return x.squeeze(dim=-1)  # [N,C,1] -> [N,C]
 class MaxPoolWithMask(nn.Module):
    """带mask矩阵的1维max pooling"""
    """
    别名：:class:`fastNLP.modules.aggregator.MaxPoolWithMask`  :class:`fastNLP.modules.aggregator.pooling.MaxPoolWithMask`
    带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。
    """
    def __init__(self):
        super(MaxPoolWithMask, self).__init__()
@@ -89,7 +92,11 @@ class KMaxPool(nn.Module):
 class AvgPool(nn.Module):
    """1-d average pooling module."""
    """
    别名：:class:`fastNLP.modules.aggregator.AvgPool`  :class:`fastNLP.modules.aggregator.pooling.AvgPool`
    给定形如[batch_size, max_len, hidden_size]的输入，在最后一维进行avg pooling. 输出为[batch_size, hidden_size]
    """
    def __init__(self, stride=None, padding=0):
        super(AvgPool, self).__init__()
@@ -111,10 +118,16 @@ class AvgPool(nn.Module):
        return x.squeeze(dim=-1)
 class MeanPoolWithMask(nn.Module):
 class AvgPoolWithMask(nn.Module):
    """
    别名：:class:`fastNLP.modules.aggregator.AvgPoolWithMask`  :class:`fastNLP.modules.aggregator.pooling.AvgPoolWithMask`
    给定形如[batch_size, max_len, hidden_size]的输入，在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling
    的时候只会考虑mask为1的位置
    """
    def __init__(self):
        super(MeanPoolWithMask, self).__init__()
        super(AvgPoolWithMask, self).__init__()
        self.inf = 10e12
    def forward(self, tensor, mask, dim=1):
--- a/fastNLP/modules/decoder/CRF.py
+++ b/fastNLP/modules/decoder/CRF.py
@@ -6,6 +6,8 @@ from ..utils import initial_parameter
 def allowed_transitions(id2target, encoding_type='bio', include_start_end=True):
    """
    别名：:class:`fastNLP.modules.decoder.allowed_transitions`  :class:`fastNLP.modules.decoder.CRF.allowed_transitions`
    给定一个id到label的映射表，返回所有可以跳转的(from_tag_id, to_tag_id)列表。
    :param dict id2target: key是label的indices，value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是
@@ -133,7 +135,10 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label
 class ConditionalRandomField(nn.Module):
    """条件随机场。
    """
    别名：:class:`fastNLP.modules.decoder.ConditionalRandomField`  :class:`fastNLP.modules.decoder.CRF.ConditionalRandomField`
    条件随机场。
    提供forward()以及viterbi_decode()两个方法，分别用于训练与inference。
    :param int num_tags: 标签的数量
--- a/fastNLP/modules/decoder/MLP.py
+++ b/fastNLP/modules/decoder/MLP.py
@@ -5,7 +5,10 @@ from ..utils import initial_parameter
 class MLP(nn.Module):
    """Multilayer Perceptrons as a decoder
    """
    别名：:class:`fastNLP.modules.MLP`  :class:`fastNLP.modules.decoder.MLP.MLP`
    多层感知器
    :param list size_layer: 一个int的列表，用来定义MLP的层数，列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1
    :param str or list activation:
--- a/fastNLP/modules/decoder/utils.py
+++ b/fastNLP/modules/decoder/utils.py
@@ -3,7 +3,10 @@ import torch
 def viterbi_decode(logits, transitions, mask=None, unpad=False):
    """给定一个特征矩阵以及转移分数矩阵，计算出最佳的路径以及对应的分数
    """
    别名：:class:`fastNLP.modules.decoder.viterbi_decode`  :class:`fastNLP.modules.decoder.utils.viterbi_decode
    给定一个特征矩阵以及转移分数矩阵，计算出最佳的路径以及对应的分数
    :param torch.FloatTensor logits: batch_size x max_len x num_tags，特征矩阵。
    :param torch.FloatTensor transitions:  n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。
--- a/fastNLP/modules/dropout.py
+++ b/fastNLP/modules/dropout.py
@@ -2,8 +2,11 @@ import torch
 __all__ = []
 class TimestepDropout(torch.nn.Dropout):
    """This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single
    dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step.
    """
    别名：:class:`fastNLP.modules.TimestepDropout`
    接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``)
     在每个timestamp上做dropout。
    """
    def forward(self, x):
--- a/fastNLP/modules/encoder/char_encoder.py
+++ b/fastNLP/modules/encoder/char_encoder.py
@@ -7,6 +7,8 @@ from ..utils import initial_parameter
 # from torch.nn.init import xavier_uniform
 class ConvolutionCharEncoder(nn.Module):
    """
    别名：:class:`fastNLP.modules.encoder.ConvolutionCharEncoder`   :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder`
    char级别的卷积编码器.
    :param int char_emb_size: char级别embedding的维度. Default: 50
            例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50.
--- a/fastNLP/modules/encoder/conv_maxpool.py
+++ b/fastNLP/modules/encoder/conv_maxpool.py
@@ -9,10 +9,12 @@ from ..utils import initial_parameter
 class ConvMaxpool(nn.Module):
    """集合了Convolution和Max-Pooling于一体的层。
    给定一个batch_size x max_len x input_size的输入，返回batch_size x sum(output_channels) 大小的matrix。在内部，是先使用
    CNN给输入做卷积，然后经过activation激活层，在通过在长度(max_len)这一维进行max_pooling。最后得到每个sample的一个vector
    表示。
    """
    别名：:class:`fastNLP.modules.encoder.ConvMaxpool`   :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool`
    集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入，返回batch_size x
    sum(output_channels) 大小的matrix。在内部，是先使用CNN给输入做卷积，然后经过activation激活层，在通过在长度(max_len)
    这一维进行max_pooling。最后得到每个sample的一个向量表示。
    :param int in_channels: 输入channel的大小，一般是embedding的维度; 或encoder的output维度
    :param int,tuple(int) out_channels: 输出channel的数量。如果为list，则需要与kernel_sizes的数量保持一致
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@@ -2,7 +2,10 @@ import torch.nn as nn
 from ..utils import get_embeddings
 class Embedding(nn.Embedding):
    """Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度"""
    """
    别名：:class:`fastNLP.modules.Embedding`   :class:`fastNLP.modules.encoder.embedding.Embedding`
    Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度"""
    def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2,
                  scale_grad_by_freq=False):
--- a/fastNLP/modules/encoder/lstm.py
+++ b/fastNLP/modules/encoder/lstm.py
@@ -9,7 +9,10 @@ from ..utils import initial_parameter
 class LSTM(nn.Module):
    """LSTM 模块, 轻量封装的Pytorch LSTM
    """
    别名：:class:`fastNLP.modules.encoder.LSTM`  :class:`fastNLP.modules.encoder.lstm.LSTM`
    LSTM 模块, 轻量封装的Pytorch LSTM
    :param input_size:  输入 `x` 的特征维度
    :param hidden_size: 隐状态 `h` 的特征维度
--- a/fastNLP/modules/encoder/star_transformer.py
+++ b/fastNLP/modules/encoder/star_transformer.py
@@ -8,6 +8,9 @@ import numpy as NP
 class StarTransformer(nn.Module):
    """
    别名：:class:`fastNLP.modules.encoder.StarTransformer`  :class:`fastNLP.modules.encoder.star_transformer.StarTransformer`
    Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码
    paper: https://arxiv.org/abs/1902.09113
--- a/fastNLP/modules/encoder/transformer.py
+++ b/fastNLP/modules/encoder/transformer.py
@@ -5,7 +5,11 @@ from ..dropout import TimestepDropout
 class TransformerEncoder(nn.Module):
    """transformer的encoder模块，不包含embedding层
    """
    别名：:class:`fastNLP.modules.encoder.TransformerEncoder`  :class:`fastNLP.modules.encoder.transformer.TransformerEncoder`
    transformer的encoder模块，不包含embedding层
    :param int num_layers: transformer的层数
    :param int model_size: 输入维度的大小。同时也是输出维度的大小。
--- a/fastNLP/modules/encoder/variational_rnn.py
+++ b/fastNLP/modules/encoder/variational_rnn.py
@@ -197,7 +197,10 @@ class VarRNNBase(nn.Module):
        return output, hidden
 class VarLSTM(VarRNNBase):
    """Variational Dropout LSTM.
    """
    别名：:class:`fastNLP.modules.encoder.VarLSTM`  :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM`
    Variational Dropout LSTM.
    :param input_size:  输入 `x` 的特征维度
    :param hidden_size: 隐状态  `h`  的特征维度
@@ -218,7 +221,10 @@ class VarLSTM(VarRNNBase):
 class VarRNN(VarRNNBase):
    """Variational Dropout RNN.
    """
    别名：:class:`fastNLP.modules.encoder.VarRNN`  :class:`fastNLP.modules.encoder.variational_rnn.VarRNN`
    Variational Dropout RNN.
    :param input_size:  输入 `x` 的特征维度
    :param hidden_size: 隐状态 `h` 的特征维度
@@ -238,7 +244,10 @@ class VarRNN(VarRNNBase):
        return super(VarRNN, self).forward(x, hx)
 class VarGRU(VarRNNBase):
    """Variational Dropout GRU.
    """
    别名：:class:`fastNLP.modules.encoder.VarGRU`  :class:`fastNLP.modules.encoder.variational_rnn.VarGRU`
    Variational Dropout GRU.
    :param input_size:  输入 `x` 的特征维度
    :param hidden_size: 隐状态 `h` 的特征维度
@@ -257,35 +266,3 @@ class VarGRU(VarRNNBase):
    def forward(self, x, hx=None):
        return super(VarGRU, self).forward(x, hx)
 # if __name__ == '__main__':
 #     x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1
 #     mask = (x != 0).float().view(3, -1)
 #     seq_lens = torch.LongTensor([3,2,1])
 #     y = torch.Tensor([[0,1,1], [1,1,0], [0,0,0]])
 #     # rev = _reverse_packed_sequence(pack)
 #     # # print(rev)
 #     lstm = VarLSTM(input_size=1, num_layers=2, hidden_size=2,
 #                    batch_first=True, bidirectional=True,
 #                    input_dropout=0.0, hidden_dropout=0.0,)
 #     # lstm = nn.LSTM(input_size=1, num_layers=2, hidden_size=2,
 #     #                batch_first=True, bidirectional=True,)
 #     loss = nn.BCELoss()
 #     m = nn.Sigmoid()
 #     optim = torch.optim.SGD(lstm.parameters(), lr=1e-3)
 #     for i in range(2000):
 #         optim.zero_grad()
 #         pack = pack_padded_sequence(x, seq_lens, batch_first=True)
 #         out, hidden = lstm(pack)
 #         out, lens = pad_packed_sequence(out, batch_first=True)
 #         # print(lens)
 #         # print(out)
 #         # print(hidden[0])
 #         # print(hidden[0].size())
 #         # print(hidden[1])
 #         out = out.sum(-1)
 #         out = m(out) * mask
 #         l = loss(out, y)
 #         l.backward()
 #         optim.step()
 #         if i % 50 == 0:
 #             print(out)
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@@ -70,10 +70,7 @@ def initial_parameter(net, initial_method=None):
 def get_embeddings(init_embed):
    """
    得到词嵌入
    .. todo::
        补上文档
    根据输入的init_embed生成nn.Embedding对象。
    :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
        embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,