@@ -226,7 +226,10 @@ class LabelBilinear(nn.Module): | |||||
return output | return output | ||||
class BiaffineParser(GraphParser): | class BiaffineParser(GraphParser): | ||||
"""Biaffine Dependency Parser 实现. | |||||
""" | |||||
别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser` | |||||
Biaffine Dependency Parser 实现. | |||||
论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) | 论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) | ||||
<https://arxiv.org/abs/1611.01734>`_ . | <https://arxiv.org/abs/1611.01734>`_ . | ||||
@@ -456,6 +459,8 @@ class BiaffineParser(GraphParser): | |||||
class ParserLoss(LossFunc): | class ParserLoss(LossFunc): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.ParserLoss` :class:`fastNLP.models.baffine_parser.ParserLoss` | |||||
计算parser的loss | 计算parser的loss | ||||
:param pred1: [batch_size, seq_len, seq_len] 边预测logits | :param pred1: [batch_size, seq_len, seq_len] 边预测logits | ||||
@@ -478,6 +483,8 @@ class ParserLoss(LossFunc): | |||||
class ParserMetric(MetricBase): | class ParserMetric(MetricBase): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.ParserMetric` :class:`fastNLP.models.baffine_parser.ParserMetric` | |||||
评估parser的性能 | 评估parser的性能 | ||||
:param pred1: 边预测logits | :param pred1: 边预测logits | ||||
@@ -10,6 +10,8 @@ from ..modules import encoder | |||||
class CNNText(torch.nn.Module): | class CNNText(torch.nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.CNNText` :class:`fastNLP.modules.aggregator.cnn_text_classification.CNNText` | |||||
使用CNN进行文本分类的模型 | 使用CNN进行文本分类的模型 | ||||
'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | 'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | ||||
@@ -10,6 +10,8 @@ from torch import nn | |||||
class SeqLabeling(BaseModel): | class SeqLabeling(BaseModel): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.SeqLabeling` :class:`fastNLP.modules.aggregator.sequence_labeling.SeqLabeling` | |||||
一个基础的Sequence labeling的模型。 | 一个基础的Sequence labeling的模型。 | ||||
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | ||||
@@ -100,6 +102,8 @@ class SeqLabeling(BaseModel): | |||||
class AdvSeqLabel(nn.Module): | class AdvSeqLabel(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.AdvSeqLabel` :class:`fastNLP.modules.aggregator.sequence_labeling.AdvSeqLabel` | |||||
更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | ||||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | ||||
@@ -14,8 +14,9 @@ my_inf = 10e12 | |||||
class ESIM(BaseModel): | class ESIM(BaseModel): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.ESIM` :class:`fastNLP.models.snli.ESIM` | |||||
ESIM模型的一个PyTorch实现。 | ESIM模型的一个PyTorch实现。 | ||||
ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | ||||
:param int vocab_size: 词表大小 | :param int vocab_size: 词表大小 | ||||
@@ -49,7 +50,7 @@ class ESIM(BaseModel): | |||||
) | ) | ||||
self.bi_attention = Aggregator.BiAttention() | self.bi_attention = Aggregator.BiAttention() | ||||
self.mean_pooling = Aggregator.MeanPoolWithMask() | |||||
self.mean_pooling = Aggregator.AvgPoolWithMask() | |||||
self.max_pooling = Aggregator.MaxPoolWithMask() | self.max_pooling = Aggregator.MaxPoolWithMask() | ||||
self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size) | self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size) | ||||
@@ -11,6 +11,8 @@ from torch import nn | |||||
class StarTransEnc(nn.Module): | class StarTransEnc(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.start_transformer.StarTransEnc` | |||||
带word embedding的Star-Transformer Encoder | 带word embedding的Star-Transformer Encoder | ||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | ||||
@@ -93,7 +95,10 @@ class _NLICls(nn.Module): | |||||
return h | return h | ||||
class STSeqLabel(nn.Module): | class STSeqLabel(nn.Module): | ||||
"""用于序列标注的Star-Transformer模型 | |||||
""" | |||||
别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.start_transformer.STSeqLabel` | |||||
用于序列标注的Star-Transformer模型 | |||||
:param vocab_size: 词嵌入的词典大小 | :param vocab_size: 词嵌入的词典大小 | ||||
:param emb_dim: 每个词嵌入的特征维度 | :param emb_dim: 每个词嵌入的特征维度 | ||||
@@ -153,7 +158,10 @@ class STSeqLabel(nn.Module): | |||||
class STSeqCls(nn.Module): | class STSeqCls(nn.Module): | ||||
"""用于分类任务的Star-Transformer | |||||
""" | |||||
别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.start_transformer.STSeqCls` | |||||
用于分类任务的Star-Transformer | |||||
:param vocab_size: 词嵌入的词典大小 | :param vocab_size: 词嵌入的词典大小 | ||||
:param emb_dim: 每个词嵌入的特征维度 | :param emb_dim: 每个词嵌入的特征维度 | ||||
@@ -214,7 +222,10 @@ class STSeqCls(nn.Module): | |||||
class STNLICls(nn.Module): | class STNLICls(nn.Module): | ||||
"""用于自然语言推断(NLI)的Star-Transformer | |||||
""" | |||||
别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.start_transformer.STNLICls` | |||||
用于自然语言推断(NLI)的Star-Transformer | |||||
:param vocab_size: 词嵌入的词典大小 | :param vocab_size: 词嵌入的词典大小 | ||||
:param emb_dim: 每个词嵌入的特征维度 | :param emb_dim: 每个词嵌入的特征维度 | ||||
@@ -1,7 +1,7 @@ | |||||
from .pooling import MaxPool | from .pooling import MaxPool | ||||
from .pooling import MaxPoolWithMask | from .pooling import MaxPoolWithMask | ||||
from .pooling import AvgPool | from .pooling import AvgPool | ||||
from .pooling import MeanPoolWithMask | |||||
from .pooling import AvgPoolWithMask | |||||
from .attention import MultiHeadAttention, BiAttention | from .attention import MultiHeadAttention, BiAttention | ||||
__all__ = [ | __all__ = [ | ||||
@@ -5,6 +5,8 @@ import torch.nn as nn | |||||
class MaxPool(nn.Module): | class MaxPool(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.aggregator.MaxPool` :class:`fastNLP.modules.aggregator.pooling.MaxPool` | |||||
Max-pooling模块。 | Max-pooling模块。 | ||||
:param stride: 窗口移动大小,默认为kernel_size | :param stride: 窗口移动大小,默认为kernel_size | ||||
@@ -12,11 +14,9 @@ class MaxPool(nn.Module): | |||||
:param dilation: 控制窗口内元素移动距离的大小 | :param dilation: 控制窗口内元素移动距离的大小 | ||||
:param dimension: MaxPool的维度,支持1,2,3维。 | :param dimension: MaxPool的维度,支持1,2,3维。 | ||||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | :param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | ||||
:param return_indices: | |||||
:param ceil_mode: | :param ceil_mode: | ||||
""" | """ | ||||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | |||||
return_indices=False, ceil_mode=False): | |||||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False): | |||||
super(MaxPool, self).__init__() | super(MaxPool, self).__init__() | ||||
assert (1 <= dimension) and (dimension <= 3) | assert (1 <= dimension) and (dimension <= 3) | ||||
@@ -25,7 +25,6 @@ class MaxPool(nn.Module): | |||||
self.padding = padding | self.padding = padding | ||||
self.dilation = dilation | self.dilation = dilation | ||||
self.kernel_size = kernel_size | self.kernel_size = kernel_size | ||||
self.return_indices = return_indices | |||||
self.ceil_mode = ceil_mode | self.ceil_mode = ceil_mode | ||||
def forward(self, x): | def forward(self, x): | ||||
@@ -33,27 +32,31 @@ class MaxPool(nn.Module): | |||||
pooling = nn.MaxPool1d( | pooling = nn.MaxPool1d( | ||||
stride=self.stride, padding=self.padding, dilation=self.dilation, | stride=self.stride, padding=self.padding, dilation=self.dilation, | ||||
kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1), | kernel_size=self.kernel_size if self.kernel_size is not None else x.size(-1), | ||||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||||
return_indices=False, ceil_mode=self.ceil_mode | |||||
) | ) | ||||
x = torch.transpose(x, 1, 2) # [N,L,C] -> [N,C,L] | x = torch.transpose(x, 1, 2) # [N,L,C] -> [N,C,L] | ||||
elif self.dimension == 2: | elif self.dimension == 2: | ||||
pooling = nn.MaxPool2d( | pooling = nn.MaxPool2d( | ||||
stride=self.stride, padding=self.padding, dilation=self.dilation, | stride=self.stride, padding=self.padding, dilation=self.dilation, | ||||
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)), | kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-2), x.size(-1)), | ||||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||||
return_indices=False, ceil_mode=self.ceil_mode | |||||
) | ) | ||||
else: | else: | ||||
pooling = nn.MaxPool2d( | pooling = nn.MaxPool2d( | ||||
stride=self.stride, padding=self.padding, dilation=self.dilation, | stride=self.stride, padding=self.padding, dilation=self.dilation, | ||||
kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)), | kernel_size=self.kernel_size if self.kernel_size is not None else (x.size(-3), x.size(-2), x.size(-1)), | ||||
return_indices=self.return_indices, ceil_mode=self.ceil_mode | |||||
return_indices=False, ceil_mode=self.ceil_mode | |||||
) | ) | ||||
x = pooling(x) | x = pooling(x) | ||||
return x.squeeze(dim=-1) # [N,C,1] -> [N,C] | return x.squeeze(dim=-1) # [N,C,1] -> [N,C] | ||||
class MaxPoolWithMask(nn.Module): | class MaxPoolWithMask(nn.Module): | ||||
"""带mask矩阵的1维max pooling""" | |||||
""" | |||||
别名::class:`fastNLP.modules.aggregator.MaxPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.MaxPoolWithMask` | |||||
带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。 | |||||
""" | |||||
def __init__(self): | def __init__(self): | ||||
super(MaxPoolWithMask, self).__init__() | super(MaxPoolWithMask, self).__init__() | ||||
@@ -89,7 +92,11 @@ class KMaxPool(nn.Module): | |||||
class AvgPool(nn.Module): | class AvgPool(nn.Module): | ||||
"""1-d average pooling module.""" | |||||
""" | |||||
别名::class:`fastNLP.modules.aggregator.AvgPool` :class:`fastNLP.modules.aggregator.pooling.AvgPool` | |||||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size] | |||||
""" | |||||
def __init__(self, stride=None, padding=0): | def __init__(self, stride=None, padding=0): | ||||
super(AvgPool, self).__init__() | super(AvgPool, self).__init__() | ||||
@@ -111,10 +118,16 @@ class AvgPool(nn.Module): | |||||
return x.squeeze(dim=-1) | return x.squeeze(dim=-1) | ||||
class MeanPoolWithMask(nn.Module): | |||||
class AvgPoolWithMask(nn.Module): | |||||
""" | |||||
别名::class:`fastNLP.modules.aggregator.AvgPoolWithMask` :class:`fastNLP.modules.aggregator.pooling.AvgPoolWithMask` | |||||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling | |||||
的时候只会考虑mask为1的位置 | |||||
""" | |||||
def __init__(self): | def __init__(self): | ||||
super(MeanPoolWithMask, self).__init__() | |||||
super(AvgPoolWithMask, self).__init__() | |||||
self.inf = 10e12 | self.inf = 10e12 | ||||
def forward(self, tensor, mask, dim=1): | def forward(self, tensor, mask, dim=1): | ||||
@@ -6,6 +6,8 @@ from ..utils import initial_parameter | |||||
def allowed_transitions(id2target, encoding_type='bio', include_start_end=True): | def allowed_transitions(id2target, encoding_type='bio', include_start_end=True): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.decoder.allowed_transitions` :class:`fastNLP.modules.decoder.CRF.allowed_transitions` | |||||
给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 | 给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 | ||||
:param dict id2target: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是 | :param dict id2target: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是 | ||||
@@ -133,7 +135,10 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label | |||||
class ConditionalRandomField(nn.Module): | class ConditionalRandomField(nn.Module): | ||||
"""条件随机场。 | |||||
""" | |||||
别名::class:`fastNLP.modules.decoder.ConditionalRandomField` :class:`fastNLP.modules.decoder.CRF.ConditionalRandomField` | |||||
条件随机场。 | |||||
提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 | 提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 | ||||
:param int num_tags: 标签的数量 | :param int num_tags: 标签的数量 | ||||
@@ -5,7 +5,10 @@ from ..utils import initial_parameter | |||||
class MLP(nn.Module): | class MLP(nn.Module): | ||||
"""Multilayer Perceptrons as a decoder | |||||
""" | |||||
别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.MLP.MLP` | |||||
多层感知器 | |||||
:param list size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1 | :param list size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1 | ||||
:param str or list activation: | :param str or list activation: | ||||
@@ -3,7 +3,10 @@ import torch | |||||
def viterbi_decode(logits, transitions, mask=None, unpad=False): | def viterbi_decode(logits, transitions, mask=None, unpad=False): | ||||
"""给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | |||||
""" | |||||
别名::class:`fastNLP.modules.decoder.viterbi_decode` :class:`fastNLP.modules.decoder.utils.viterbi_decode | |||||
给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | |||||
:param torch.FloatTensor logits: batch_size x max_len x num_tags,特征矩阵。 | :param torch.FloatTensor logits: batch_size x max_len x num_tags,特征矩阵。 | ||||
:param torch.FloatTensor transitions: n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。 | :param torch.FloatTensor transitions: n_tags x n_tags。[i, j]位置的值认为是从tag i到tag j的转换。 | ||||
@@ -2,8 +2,11 @@ import torch | |||||
__all__ = [] | __all__ = [] | ||||
class TimestepDropout(torch.nn.Dropout): | class TimestepDropout(torch.nn.Dropout): | ||||
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | |||||
dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step. | |||||
""" | |||||
别名::class:`fastNLP.modules.TimestepDropout` | |||||
接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``) | |||||
在每个timestamp上做dropout。 | |||||
""" | """ | ||||
def forward(self, x): | def forward(self, x): | ||||
@@ -7,6 +7,8 @@ from ..utils import initial_parameter | |||||
# from torch.nn.init import xavier_uniform | # from torch.nn.init import xavier_uniform | ||||
class ConvolutionCharEncoder(nn.Module): | class ConvolutionCharEncoder(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.encoder.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder` | |||||
char级别的卷积编码器. | char级别的卷积编码器. | ||||
:param int char_emb_size: char级别embedding的维度. Default: 50 | :param int char_emb_size: char级别embedding的维度. Default: 50 | ||||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | 例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | ||||
@@ -9,10 +9,12 @@ from ..utils import initial_parameter | |||||
class ConvMaxpool(nn.Module): | class ConvMaxpool(nn.Module): | ||||
"""集合了Convolution和Max-Pooling于一体的层。 | |||||
给定一个batch_size x max_len x input_size的输入,返回batch_size x sum(output_channels) 大小的matrix。在内部,是先使用 | |||||
CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len)这一维进行max_pooling。最后得到每个sample的一个vector | |||||
表示。 | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.ConvMaxpool` :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool` | |||||
集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x | |||||
sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len) | |||||
这一维进行max_pooling。最后得到每个sample的一个向量表示。 | |||||
:param int in_channels: 输入channel的大小,一般是embedding的维度; 或encoder的output维度 | :param int in_channels: 输入channel的大小,一般是embedding的维度; 或encoder的output维度 | ||||
:param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | :param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | ||||
@@ -2,7 +2,10 @@ import torch.nn as nn | |||||
from ..utils import get_embeddings | from ..utils import get_embeddings | ||||
class Embedding(nn.Embedding): | class Embedding(nn.Embedding): | ||||
"""Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" | |||||
""" | |||||
别名::class:`fastNLP.modules.Embedding` :class:`fastNLP.modules.encoder.embedding.Embedding` | |||||
Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" | |||||
def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2, | def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2, | ||||
scale_grad_by_freq=False): | scale_grad_by_freq=False): | ||||
@@ -9,7 +9,10 @@ from ..utils import initial_parameter | |||||
class LSTM(nn.Module): | class LSTM(nn.Module): | ||||
"""LSTM 模块, 轻量封装的Pytorch LSTM | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.LSTM` :class:`fastNLP.modules.encoder.lstm.LSTM` | |||||
LSTM 模块, 轻量封装的Pytorch LSTM | |||||
:param input_size: 输入 `x` 的特征维度 | :param input_size: 输入 `x` 的特征维度 | ||||
:param hidden_size: 隐状态 `h` 的特征维度 | :param hidden_size: 隐状态 `h` 的特征维度 | ||||
@@ -8,6 +8,9 @@ import numpy as NP | |||||
class StarTransformer(nn.Module): | class StarTransformer(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.encoder.StarTransformer` :class:`fastNLP.modules.encoder.star_transformer.StarTransformer` | |||||
Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 | Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 | ||||
paper: https://arxiv.org/abs/1902.09113 | paper: https://arxiv.org/abs/1902.09113 | ||||
@@ -5,7 +5,11 @@ from ..dropout import TimestepDropout | |||||
class TransformerEncoder(nn.Module): | class TransformerEncoder(nn.Module): | ||||
"""transformer的encoder模块,不包含embedding层 | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.TransformerEncoder` :class:`fastNLP.modules.encoder.transformer.TransformerEncoder` | |||||
transformer的encoder模块,不包含embedding层 | |||||
:param int num_layers: transformer的层数 | :param int num_layers: transformer的层数 | ||||
:param int model_size: 输入维度的大小。同时也是输出维度的大小。 | :param int model_size: 输入维度的大小。同时也是输出维度的大小。 | ||||
@@ -197,7 +197,10 @@ class VarRNNBase(nn.Module): | |||||
return output, hidden | return output, hidden | ||||
class VarLSTM(VarRNNBase): | class VarLSTM(VarRNNBase): | ||||
"""Variational Dropout LSTM. | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.VarLSTM` :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM` | |||||
Variational Dropout LSTM. | |||||
:param input_size: 输入 `x` 的特征维度 | :param input_size: 输入 `x` 的特征维度 | ||||
:param hidden_size: 隐状态 `h` 的特征维度 | :param hidden_size: 隐状态 `h` 的特征维度 | ||||
@@ -218,7 +221,10 @@ class VarLSTM(VarRNNBase): | |||||
class VarRNN(VarRNNBase): | class VarRNN(VarRNNBase): | ||||
"""Variational Dropout RNN. | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.VarRNN` :class:`fastNLP.modules.encoder.variational_rnn.VarRNN` | |||||
Variational Dropout RNN. | |||||
:param input_size: 输入 `x` 的特征维度 | :param input_size: 输入 `x` 的特征维度 | ||||
:param hidden_size: 隐状态 `h` 的特征维度 | :param hidden_size: 隐状态 `h` 的特征维度 | ||||
@@ -238,7 +244,10 @@ class VarRNN(VarRNNBase): | |||||
return super(VarRNN, self).forward(x, hx) | return super(VarRNN, self).forward(x, hx) | ||||
class VarGRU(VarRNNBase): | class VarGRU(VarRNNBase): | ||||
"""Variational Dropout GRU. | |||||
""" | |||||
别名::class:`fastNLP.modules.encoder.VarGRU` :class:`fastNLP.modules.encoder.variational_rnn.VarGRU` | |||||
Variational Dropout GRU. | |||||
:param input_size: 输入 `x` 的特征维度 | :param input_size: 输入 `x` 的特征维度 | ||||
:param hidden_size: 隐状态 `h` 的特征维度 | :param hidden_size: 隐状态 `h` 的特征维度 | ||||
@@ -257,35 +266,3 @@ class VarGRU(VarRNNBase): | |||||
def forward(self, x, hx=None): | def forward(self, x, hx=None): | ||||
return super(VarGRU, self).forward(x, hx) | return super(VarGRU, self).forward(x, hx) | ||||
# if __name__ == '__main__': | |||||
# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1 | |||||
# mask = (x != 0).float().view(3, -1) | |||||
# seq_lens = torch.LongTensor([3,2,1]) | |||||
# y = torch.Tensor([[0,1,1], [1,1,0], [0,0,0]]) | |||||
# # rev = _reverse_packed_sequence(pack) | |||||
# # # print(rev) | |||||
# lstm = VarLSTM(input_size=1, num_layers=2, hidden_size=2, | |||||
# batch_first=True, bidirectional=True, | |||||
# input_dropout=0.0, hidden_dropout=0.0,) | |||||
# # lstm = nn.LSTM(input_size=1, num_layers=2, hidden_size=2, | |||||
# # batch_first=True, bidirectional=True,) | |||||
# loss = nn.BCELoss() | |||||
# m = nn.Sigmoid() | |||||
# optim = torch.optim.SGD(lstm.parameters(), lr=1e-3) | |||||
# for i in range(2000): | |||||
# optim.zero_grad() | |||||
# pack = pack_padded_sequence(x, seq_lens, batch_first=True) | |||||
# out, hidden = lstm(pack) | |||||
# out, lens = pad_packed_sequence(out, batch_first=True) | |||||
# # print(lens) | |||||
# # print(out) | |||||
# # print(hidden[0]) | |||||
# # print(hidden[0].size()) | |||||
# # print(hidden[1]) | |||||
# out = out.sum(-1) | |||||
# out = m(out) * mask | |||||
# l = loss(out, y) | |||||
# l.backward() | |||||
# optim.step() | |||||
# if i % 50 == 0: | |||||
# print(out) |
@@ -70,10 +70,7 @@ def initial_parameter(net, initial_method=None): | |||||
def get_embeddings(init_embed): | def get_embeddings(init_embed): | ||||
""" | """ | ||||
得到词嵌入 | |||||
.. todo:: | |||||
补上文档 | |||||
根据输入的init_embed生成nn.Embedding对象。 | |||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | ||||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | ||||