Browse Source

增加别名

tags/v0.4.10
yh_cc 5 years ago
parent
commit
6aaef9175c
19 changed files with 112 additions and 69 deletions
  1. +8
    -1
      fastNLP/models/biaffine_parser.py
  2. +2
    -0
      fastNLP/models/cnn_text_classification.py
  3. +4
    -0
      fastNLP/models/sequence_labeling.py
  4. +3
    -2
      fastNLP/models/snli.py
  5. +14
    -3
      fastNLP/models/star_transformer.py
  6. +1
    -1
      fastNLP/modules/aggregator/__init__.py
  7. +24
    -11
      fastNLP/modules/aggregator/pooling.py
  8. +6
    -1
      fastNLP/modules/decoder/CRF.py
  9. +4
    -1
      fastNLP/modules/decoder/MLP.py
  10. +4
    -1
      fastNLP/modules/decoder/utils.py
  11. +5
    -2
      fastNLP/modules/dropout.py
  12. +2
    -0
      fastNLP/modules/encoder/char_encoder.py
  13. +6
    -4
      fastNLP/modules/encoder/conv_maxpool.py
  14. +4
    -1
      fastNLP/modules/encoder/embedding.py
  15. +4
    -1
      fastNLP/modules/encoder/lstm.py
  16. +3
    -0
      fastNLP/modules/encoder/star_transformer.py
  17. +5
    -1
      fastNLP/modules/encoder/transformer.py
  18. +12
    -35
      fastNLP/modules/encoder/variational_rnn.py
  19. +1
    -4
      fastNLP/modules/utils.py

+ 8
- 1
fastNLP/models/biaffine_parser.py View File

@@ -226,7 +226,10 @@ class LabelBilinear(nn.Module):
return output

class BiaffineParser(GraphParser):
"""Biaffine Dependency Parser 实现.
"""
别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser`

Biaffine Dependency Parser 实现.
论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016)
<https://arxiv.org/abs/1611.01734>`_ .

@@ -456,6 +459,8 @@ class BiaffineParser(GraphParser):

class ParserLoss(LossFunc):
"""
别名::class:`fastNLP.models.ParserLoss` :class:`fastNLP.models.baffine_parser.ParserLoss`

计算parser的loss

:param pred1: [batch_size, seq_len, seq_len] 边预测logits
@@ -478,6 +483,8 @@ class ParserLoss(LossFunc):

class ParserMetric(MetricBase):
"""
别名::class:`fastNLP.models.ParserMetric` :class:`fastNLP.models.baffine_parser.ParserMetric`

评估parser的性能

:param pred1: 边预测logits


+ 2
- 0
fastNLP/models/cnn_text_classification.py View File

@@ -10,6 +10,8 @@ from ..modules import encoder

class CNNText(torch.nn.Module):
"""
别名::class:`fastNLP.models.CNNText` :class:`fastNLP.modules.aggregator.cnn_text_classification.CNNText`

使用CNN进行文本分类的模型
'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.'


+ 4
- 0
fastNLP/models/sequence_labeling.py View File

@@ -10,6 +10,8 @@ from torch import nn

class SeqLabeling(BaseModel):
"""
别名::class:`fastNLP.models.SeqLabeling` :class:`fastNLP.modules.aggregator.sequence_labeling.SeqLabeling`

一个基础的Sequence labeling的模型。
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。
@@ -100,6 +102,8 @@ class SeqLabeling(BaseModel):

class AdvSeqLabel(nn.Module):
"""
别名::class:`fastNLP.models.AdvSeqLabel` :class:`fastNLP.modules.aggregator.sequence_labeling.AdvSeqLabel`

更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),


+ 3
- 2
fastNLP/models/snli.py View File

@@ -14,8 +14,9 @@ my_inf = 10e12

class ESIM(BaseModel):
"""
别名::class:`fastNLP.models.ESIM` :class:`fastNLP.models.snli.ESIM`

ESIM模型的一个PyTorch实现。
ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038)

:param int vocab_size: 词表大小
@@ -49,7 +50,7 @@ class ESIM(BaseModel):
)

self.bi_attention = Aggregator.BiAttention()
self.mean_pooling = Aggregator.MeanPoolWithMask()
self.mean_pooling = Aggregator.AvgPoolWithMask()
self.max_pooling = Aggregator.MaxPoolWithMask()

self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size)


+ 14
- 3
fastNLP/models/star_transformer.py View File

@@ -11,6 +11,8 @@ from torch import nn

class StarTransEnc(nn.Module):
"""
别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.start_transformer.StarTransEnc`

带word embedding的Star-Transformer Encoder

:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
@@ -93,7 +95,10 @@ class _NLICls(nn.Module):
return h

class STSeqLabel(nn.Module):
"""用于序列标注的Star-Transformer模型
"""
别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.start_transformer.STSeqLabel`

用于序列标注的Star-Transformer模型

:param vocab_size: 词嵌入的词典大小
:param emb_dim: 每个词嵌入的特征维度
@@ -153,7 +158,10 @@ class STSeqLabel(nn.Module):


class STSeqCls(nn.Module):
"""用于分类任务的Star-Transformer
"""
别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.start_transformer.STSeqCls`

用于分类任务的Star-Transformer

:param vocab_size: 词嵌入的词典大小
:param emb_dim: 每个词嵌入的特征维度
@@ -214,7 +222,10 @@ class STSeqCls(nn.Module):


class STNLICls(nn.Module):
"""用于自然语言推断(NLI)的Star-Transformer
"""
别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.start_transformer.STNLICls`
用于自然语言推断(NLI)的Star-Transformer

:param vocab_size: 词嵌入的词典大小
:param emb_dim: 每个词嵌入的特征维度


+ 1
- 1
fastNLP/modules/aggregator/__init__.py View File

@@ -1,7 +1,7 @@
from .pooling import MaxPool
from .pooling import MaxPoolWithMask
from .pooling import AvgPool
from .pooling import MeanPoolWithMask
from .pooling import AvgPoolWithMask

from .attention import MultiHeadAttention, BiAttention
__all__ = [


+ 24
- 11
fastNLP/modules/aggregator/pooling.py View File

@@ -5,6 +5,8 @@ import torch.nn as nn

class MaxPool(nn.Module):
"""
别名::class:`fastNLP.modules.aggregator.MaxPool` :class:`fastNLP.modules.aggregator.pooling.MaxPool`

Max-pooling模块。
:param stride: 窗口移动大小,默认为kernel_size
@@ -12,11 +14,9 @@ class MaxPool(nn.Module):
:param dilation: 控制窗口内元素移动距离的大小
:param dimension: MaxPool的维度,支持1,2,3维。
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension
:param return_indices:
:param ceil_mode:
"""
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None,
return_indices=False, ceil_mode=False):
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False):
super(MaxPool, self).__init__()
assert (1 <= dimension) and (dimension <= 3)
@@ -25,7 +25,6 @@ class MaxPool(nn.Module):
self.padding = padding
self.dilation = dilation
self.kernel_size = kernel_size
self.return_indices = return_indices
self.ceil_mode = ceil_mode
def forward(self, x):
@@ -33,27 +32,31 @@ class MaxPool(nn.Module):
pooling = nn.MaxPool1d(
stride=self.stride, padding=self.padding, dilation=self.dilation,
kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1),
return_indices=self.return_indices, ceil_mode=self.ceil_mode
return_indices=False, ceil_mode=self.ceil_mode
)
x = torch.transpose(x, 1, 2) # [N,L,C] -> [N,C,L]
elif self.dimension == 2:
pooling = nn.MaxPool2d(
stride=self.stride, padding=self.padding, dilation=self.dilation,
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)),
return_indices=self.return_indices, ceil_mode=self.ceil_mode
return_indices=False, ceil_mode=self.ceil_mode
)
else:
pooling = nn.MaxPool2d(
stride=self.stride, padding=self.padding, dilation=self.dilation,
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)),
return_indices=self.return_indices, ceil_mode=self.ceil_mode
return_indices=False, ceil_mode=self.ceil_mode
)
x = pooling(x)
return x.squeeze(dim=-1) # [N,C,1] -> [N,C]


class MaxPoolWithMask(nn.Module):
"""带mask矩阵的1维max pooling"""
"""
别名::class:`fastNLP.modules.aggregator.MaxPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.MaxPoolWithMask`

带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。
"""
def __init__(self):
super(MaxPoolWithMask, self).__init__()
@@ -89,7 +92,11 @@ class KMaxPool(nn.Module):


class AvgPool(nn.Module):
"""1-d average pooling module."""
"""
别名::class:`fastNLP.modules.aggregator.AvgPool` :class:`fastNLP.modules.aggregator.pooling.AvgPool`

给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size]
"""
def __init__(self, stride=None, padding=0):
super(AvgPool, self).__init__()
@@ -111,10 +118,16 @@ class AvgPool(nn.Module):
return x.squeeze(dim=-1)


class MeanPoolWithMask(nn.Module):
class AvgPoolWithMask(nn.Module):
"""
别名::class:`fastNLP.modules.aggregator.AvgPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.AvgPoolWithMask`

给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling
的时候只会考虑mask为1的位置
"""

def __init__(self):
super(MeanPoolWithMask, self).__init__()
super(AvgPoolWithMask, self).__init__()
self.inf = 10e12
def forward(self, tensor, mask, dim=1):


+ 6
- 1
fastNLP/modules/decoder/CRF.py View File

@@ -6,6 +6,8 @@ from ..utils import initial_parameter

def allowed_transitions(id2target, encoding_type='bio', include_start_end=True):
"""
别名::class:`fastNLP.modules.decoder.allowed_transitions` :class:`fastNLP.modules.decoder.CRF.allowed_transitions`

给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。

:param dict id2target: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是
@@ -133,7 +135,10 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label


class ConditionalRandomField(nn.Module):
"""条件随机场。
"""
别名::class:`fastNLP.modules.decoder.ConditionalRandomField` :class:`fastNLP.modules.decoder.CRF.ConditionalRandomField`

条件随机场。
提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。

:param int num_tags: 标签的数量


+ 4
- 1
fastNLP/modules/decoder/MLP.py View File

@@ -5,7 +5,10 @@ from ..utils import initial_parameter


class MLP(nn.Module):
"""Multilayer Perceptrons as a decoder
"""
别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.MLP.MLP`

多层感知器

:param list size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1
:param str or list activation:


+ 4
- 1
fastNLP/modules/decoder/utils.py View File

@@ -3,7 +3,10 @@ import torch


def viterbi_decode(logits, transitions, mask=None, unpad=False):
"""给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数
"""
别名::class:`fastNLP.modules.decoder.viterbi_decode` :class:`fastNLP.modules.decoder.utils.viterbi_decode

给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数

:param torch.FloatTensor logits: batch_size x max_len x num_tags,特征矩阵。
:param torch.FloatTensor transitions: n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。


+ 5
- 2
fastNLP/modules/dropout.py View File

@@ -2,8 +2,11 @@ import torch
__all__ = []

class TimestepDropout(torch.nn.Dropout):
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single
dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step.
"""
别名::class:`fastNLP.modules.TimestepDropout`

接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``)
在每个timestamp上做dropout。
"""

def forward(self, x):


+ 2
- 0
fastNLP/modules/encoder/char_encoder.py View File

@@ -7,6 +7,8 @@ from ..utils import initial_parameter
# from torch.nn.init import xavier_uniform
class ConvolutionCharEncoder(nn.Module):
"""
别名::class:`fastNLP.modules.encoder.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder`

char级别的卷积编码器.
:param int char_emb_size: char级别embedding的维度. Default: 50
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50.


+ 6
- 4
fastNLP/modules/encoder/conv_maxpool.py View File

@@ -9,10 +9,12 @@ from ..utils import initial_parameter


class ConvMaxpool(nn.Module):
"""集合了Convolution和Max-Pooling于一体的层。
给定一个batch_size x max_len x input_size的输入,返回batch_size x sum(output_channels) 大小的matrix。在内部,是先使用
CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len)这一维进行max_pooling。最后得到每个sample的一个vector
表示。
"""
别名::class:`fastNLP.modules.encoder.ConvMaxpool` :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool`

集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x
sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len)
这一维进行max_pooling。最后得到每个sample的一个向量表示。

:param int in_channels: 输入channel的大小,一般是embedding的维度; 或encoder的output维度
:param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致


+ 4
- 1
fastNLP/modules/encoder/embedding.py View File

@@ -2,7 +2,10 @@ import torch.nn as nn
from ..utils import get_embeddings

class Embedding(nn.Embedding):
"""Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度"""
"""
别名::class:`fastNLP.modules.Embedding` :class:`fastNLP.modules.encoder.embedding.Embedding`

Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度"""

def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2,
scale_grad_by_freq=False):


+ 4
- 1
fastNLP/modules/encoder/lstm.py View File

@@ -9,7 +9,10 @@ from ..utils import initial_parameter


class LSTM(nn.Module):
"""LSTM 模块, 轻量封装的Pytorch LSTM
"""
别名::class:`fastNLP.modules.encoder.LSTM` :class:`fastNLP.modules.encoder.lstm.LSTM`

LSTM 模块, 轻量封装的Pytorch LSTM

:param input_size: 输入 `x` 的特征维度
:param hidden_size: 隐状态 `h` 的特征维度


+ 3
- 0
fastNLP/modules/encoder/star_transformer.py View File

@@ -8,6 +8,9 @@ import numpy as NP

class StarTransformer(nn.Module):
"""
别名::class:`fastNLP.modules.encoder.StarTransformer` :class:`fastNLP.modules.encoder.star_transformer.StarTransformer`


Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码

paper: https://arxiv.org/abs/1902.09113


+ 5
- 1
fastNLP/modules/encoder/transformer.py View File

@@ -5,7 +5,11 @@ from ..dropout import TimestepDropout


class TransformerEncoder(nn.Module):
"""transformer的encoder模块,不包含embedding层
"""
别名::class:`fastNLP.modules.encoder.TransformerEncoder` :class:`fastNLP.modules.encoder.transformer.TransformerEncoder`


transformer的encoder模块,不包含embedding层

:param int num_layers: transformer的层数
:param int model_size: 输入维度的大小。同时也是输出维度的大小。


+ 12
- 35
fastNLP/modules/encoder/variational_rnn.py View File

@@ -197,7 +197,10 @@ class VarRNNBase(nn.Module):
return output, hidden

class VarLSTM(VarRNNBase):
"""Variational Dropout LSTM.
"""
别名::class:`fastNLP.modules.encoder.VarLSTM` :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM`

Variational Dropout LSTM.

:param input_size: 输入 `x` 的特征维度
:param hidden_size: 隐状态 `h` 的特征维度
@@ -218,7 +221,10 @@ class VarLSTM(VarRNNBase):


class VarRNN(VarRNNBase):
"""Variational Dropout RNN.
"""
别名::class:`fastNLP.modules.encoder.VarRNN` :class:`fastNLP.modules.encoder.variational_rnn.VarRNN`

Variational Dropout RNN.

:param input_size: 输入 `x` 的特征维度
:param hidden_size: 隐状态 `h` 的特征维度
@@ -238,7 +244,10 @@ class VarRNN(VarRNNBase):
return super(VarRNN, self).forward(x, hx)

class VarGRU(VarRNNBase):
"""Variational Dropout GRU.
"""
别名::class:`fastNLP.modules.encoder.VarGRU` :class:`fastNLP.modules.encoder.variational_rnn.VarGRU`

Variational Dropout GRU.

:param input_size: 输入 `x` 的特征维度
:param hidden_size: 隐状态 `h` 的特征维度
@@ -257,35 +266,3 @@ class VarGRU(VarRNNBase):
def forward(self, x, hx=None):
return super(VarGRU, self).forward(x, hx)

# if __name__ == '__main__':
# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1
# mask = (x != 0).float().view(3, -1)
# seq_lens = torch.LongTensor([3,2,1])
# y = torch.Tensor([[0,1,1], [1,1,0], [0,0,0]])
# # rev = _reverse_packed_sequence(pack)
# # # print(rev)
# lstm = VarLSTM(input_size=1, num_layers=2, hidden_size=2,
# batch_first=True, bidirectional=True,
# input_dropout=0.0, hidden_dropout=0.0,)
# # lstm = nn.LSTM(input_size=1, num_layers=2, hidden_size=2,
# # batch_first=True, bidirectional=True,)
# loss = nn.BCELoss()
# m = nn.Sigmoid()
# optim = torch.optim.SGD(lstm.parameters(), lr=1e-3)
# for i in range(2000):
# optim.zero_grad()
# pack = pack_padded_sequence(x, seq_lens, batch_first=True)
# out, hidden = lstm(pack)
# out, lens = pad_packed_sequence(out, batch_first=True)
# # print(lens)
# # print(out)
# # print(hidden[0])
# # print(hidden[0].size())
# # print(hidden[1])
# out = out.sum(-1)
# out = m(out) * mask
# l = loss(out, y)
# l.backward()
# optim.step()
# if i % 50 == 0:
# print(out)

+ 1
- 4
fastNLP/modules/utils.py View File

@@ -70,10 +70,7 @@ def initial_parameter(net, initial_method=None):

def get_embeddings(init_embed):
"""
得到词嵌入
.. todo::
补上文档
根据输入的init_embed生成nn.Embedding对象。

:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,


Loading…
Cancel
Save