@@ -1,7 +0,0 @@ | |||||
fastNLP.io.config\_io module | |||||
============================ | |||||
.. automodule:: fastNLP.io.config_io | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.io.file\_reader module | |||||
============================== | |||||
.. automodule:: fastNLP.io.file_reader | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -12,9 +12,7 @@ Submodules | |||||
.. toctree:: | .. toctree:: | ||||
fastNLP.io.base_loader | fastNLP.io.base_loader | ||||
fastNLP.io.config_io | |||||
fastNLP.io.dataset_loader | fastNLP.io.dataset_loader | ||||
fastNLP.io.embed_loader | fastNLP.io.embed_loader | ||||
fastNLP.io.file_reader | |||||
fastNLP.io.model_io | fastNLP.io.model_io | ||||
@@ -9,7 +9,6 @@ fastNLP 依赖如下包:: | |||||
torch>=0.4.0 | torch>=0.4.0 | ||||
numpy | numpy | ||||
tensorboardX | |||||
tqdm | tqdm | ||||
nltk | nltk | ||||
@@ -18,4 +17,4 @@ fastNLP 依赖如下包:: | |||||
.. code:: shell | .. code:: shell | ||||
>>> pip install fitlog | |||||
>>> pip install fastNLP |
@@ -5,16 +5,13 @@ | |||||
2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | 2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | ||||
3. 用于读写config文件的类, 参考 :doc:`Config-IO <fastNLP.io.config_io>` | |||||
4. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||||
3. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||||
这些类的使用方法可以在对应module的文档下查看. | 这些类的使用方法可以在对应module的文档下查看. | ||||
""" | """ | ||||
from .embed_loader import EmbedLoader | from .embed_loader import EmbedLoader | ||||
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \ | from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \ | ||||
PeopleDailyCorpusLoader, Conll2003Loader | PeopleDailyCorpusLoader, Conll2003Loader | ||||
from .config_io import ConfigLoader, ConfigSection, ConfigSaver | |||||
from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver | from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver | ||||
__all__ = [ | __all__ = [ | ||||
@@ -29,10 +26,6 @@ __all__ = [ | |||||
'PeopleDailyCorpusLoader', | 'PeopleDailyCorpusLoader', | ||||
'Conll2003Loader', | 'Conll2003Loader', | ||||
'ConfigLoader', | |||||
'ConfigSection', | |||||
'ConfigSaver', | |||||
'ModelLoader', | 'ModelLoader', | ||||
'ModelSaver', | 'ModelSaver', | ||||
] | ] |
@@ -5,7 +5,6 @@ TODO 详细介绍的表格,与主页相对应 | |||||
""" | """ | ||||
from .base_model import BaseModel | from .base_model import BaseModel | ||||
from .biaffine_parser import BiaffineParser, GraphParser | from .biaffine_parser import BiaffineParser, GraphParser | ||||
from .char_language_model import CharLM | |||||
from .cnn_text_classification import CNNText | from .cnn_text_classification import CNNText | ||||
from .sequence_modeling import SeqLabeling, AdvSeqLabel | from .sequence_modeling import SeqLabeling, AdvSeqLabel | ||||
from .snli import ESIM | from .snli import ESIM |
@@ -1,138 +0,0 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
from ..modules.encoder.lstm import LSTM | |||||
class Highway(nn.Module): | |||||
"""Highway network""" | |||||
def __init__(self, input_size): | |||||
super(Highway, self).__init__() | |||||
self.fc1 = nn.Linear(input_size, input_size, bias=True) | |||||
self.fc2 = nn.Linear(input_size, input_size, bias=True) | |||||
def forward(self, x): | |||||
t = F.sigmoid(self.fc1(x)) | |||||
return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1 - t, x) | |||||
class CharLM(nn.Module): | |||||
"""CNN + highway network + LSTM | |||||
# Input:: | |||||
4D tensor with shape [batch_size, in_channel, height, width] | |||||
# Output:: | |||||
2D Tensor with shape [batch_size, vocab_size] | |||||
# Arguments:: | |||||
char_emb_dim: the size of each character's attention | |||||
word_emb_dim: the size of each word's attention | |||||
vocab_size: num of unique words | |||||
num_char: num of characters | |||||
use_gpu: True or False | |||||
""" | |||||
def __init__(self, char_emb_dim, word_emb_dim, | |||||
vocab_size, num_char): | |||||
super(CharLM, self).__init__() | |||||
self.char_emb_dim = char_emb_dim | |||||
self.word_emb_dim = word_emb_dim | |||||
self.vocab_size = vocab_size | |||||
# char attention layer | |||||
self.char_embed = nn.Embedding(num_char, char_emb_dim) | |||||
# convolutions of filters with different sizes | |||||
self.convolutions = [] | |||||
# list of tuples: (the number of filter, width) | |||||
self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] | |||||
for out_channel, filter_width in self.filter_num_width: | |||||
self.convolutions.append( | |||||
nn.Conv2d( | |||||
1, # in_channel | |||||
out_channel, # out_channel | |||||
kernel_size=(char_emb_dim, filter_width), # (height, width) | |||||
bias=True | |||||
) | |||||
) | |||||
self.highway_input_dim = sum([x for x, y in self.filter_num_width]) | |||||
self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False) | |||||
# highway net | |||||
self.highway1 = Highway(self.highway_input_dim) | |||||
self.highway2 = Highway(self.highway_input_dim) | |||||
# LSTM | |||||
self.lstm_num_layers = 2 | |||||
self.lstm = LSTM(self.highway_input_dim, hidden_size=self.word_emb_dim, num_layers=self.lstm_num_layers, | |||||
dropout=0.5) | |||||
# output layer | |||||
self.dropout = nn.Dropout(p=0.5) | |||||
self.linear = nn.Linear(self.word_emb_dim, self.vocab_size) | |||||
def forward(self, x): | |||||
# Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2] | |||||
# Return: Variable of Tensor with shape [num_words, len(word_dict)] | |||||
lstm_batch_size = x.size()[0] | |||||
lstm_seq_len = x.size()[1] | |||||
x = x.contiguous().view(-1, x.size()[2]) | |||||
# [num_seq*seq_len, max_word_len+2] | |||||
x = self.char_embed(x) | |||||
# [num_seq*seq_len, max_word_len+2, char_emb_dim] | |||||
x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | |||||
# [num_seq*seq_len, 1, max_word_len+2, char_emb_dim] | |||||
x = self.conv_layers(x) | |||||
# [num_seq*seq_len, total_num_filters] | |||||
x = self.batch_norm(x) | |||||
# [num_seq*seq_len, total_num_filters] | |||||
x = self.highway1(x) | |||||
x = self.highway2(x) | |||||
# [num_seq*seq_len, total_num_filters] | |||||
x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) | |||||
# [num_seq, seq_len, total_num_filters] | |||||
x = self.lstm(x) | |||||
# [seq_len, num_seq, hidden_size] | |||||
x = self.dropout(x) | |||||
# [seq_len, num_seq, hidden_size] | |||||
x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) | |||||
# [num_seq*seq_len, hidden_size] | |||||
x = self.linear(x) | |||||
# [num_seq*seq_len, vocab_size] | |||||
return x | |||||
def conv_layers(self, x): | |||||
chosen_list = list() | |||||
for conv in self.convolutions: | |||||
feature_map = F.tanh(conv(x)) | |||||
# (batch_size, out_channel, 1, max_word_len-width+1) | |||||
chosen = torch.max(feature_map, 3)[0] | |||||
# (batch_size, out_channel, 1) | |||||
chosen = chosen.squeeze() | |||||
# (batch_size, out_channel) | |||||
chosen_list.append(chosen) | |||||
# (batch_size, total_num_filers) | |||||
return torch.cat(chosen_list, 1) |
@@ -12,19 +12,21 @@ my_inf = 10e12 | |||||
class ESIM(BaseModel): | class ESIM(BaseModel): | ||||
"""ESIM模型的一个PyTorch实现。 | |||||
""" | |||||
ESIM模型的一个PyTorch实现。 | |||||
ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | ||||
:param int vocab_size: 词表大小 | |||||
:param int embed_dim: 词嵌入维度 | |||||
:param int hidden_size: LSTM隐层大小 | |||||
:param float dropout: dropout大小,默认为0 | |||||
:param int num_classes: 标签数目,默认为3 | |||||
:param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||||
""" | """ | ||||
def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None): | def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None): | ||||
""" | |||||
:param int vocab_size: 词表大小 | |||||
:param int embed_dim: 词嵌入维度 | |||||
:param int hidden_size: LSTM隐层大小 | |||||
:param float dropout: dropout大小,默认为0 | |||||
:param int num_classes: 标签数目,默认为3 | |||||
:param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||||
""" | |||||
super(ESIM, self).__init__() | super(ESIM, self).__init__() | ||||
self.vocab_size = vocab_size | self.vocab_size = vocab_size | ||||
self.embed_dim = embed_dim | self.embed_dim = embed_dim | ||||
@@ -12,8 +12,8 @@ from . import decoder | |||||
from . import encoder | from . import encoder | ||||
from .aggregator import * | from .aggregator import * | ||||
from .decoder import * | from .decoder import * | ||||
from .other_modules import * | |||||
from .dropout import TimestepDropout | from .dropout import TimestepDropout | ||||
from .encoder import * | from .encoder import * | ||||
from .utils import get_embeddings | |||||
__version__ = '0.0.0' | __version__ = '0.0.0' |
@@ -1,11 +1,7 @@ | |||||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MeanPoolWithMask", "KMaxPool", "Attention", "BiAttention", | |||||
"SelfAttention"] | |||||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MultiHeadAttention"] | |||||
from .pooling import MaxPool | from .pooling import MaxPool | ||||
from .pooling import MaxPoolWithMask | from .pooling import MaxPoolWithMask | ||||
from .pooling import AvgPool | from .pooling import AvgPool | ||||
from .pooling import MeanPoolWithMask | from .pooling import MeanPoolWithMask | ||||
from .pooling import KMaxPool | |||||
from .attention import Attention | |||||
from .attention import BiAttention | |||||
from .attention import SelfAttention | |||||
from .attention import MultiHeadAttention |
@@ -1,3 +1,4 @@ | |||||
__all__ =["MultiHeadAttention"] | |||||
import math | import math | ||||
import torch | import torch | ||||
@@ -5,27 +6,14 @@ import torch.nn.functional as F | |||||
from torch import nn | from torch import nn | ||||
from ..dropout import TimestepDropout | from ..dropout import TimestepDropout | ||||
from ..utils import mask_softmax | |||||
from ..utils import initial_parameter | from ..utils import initial_parameter | ||||
class Attention(torch.nn.Module): | |||||
def __init__(self, normalize=False): | |||||
super(Attention, self).__init__() | |||||
self.normalize = normalize | |||||
def forward(self, query, memory, mask): | |||||
similarities = self._atten_forward(query, memory) | |||||
if self.normalize: | |||||
return mask_softmax(similarities, mask) | |||||
return similarities | |||||
def _atten_forward(self, query, memory): | |||||
raise NotImplementedError | |||||
class DotAttention(nn.Module): | class DotAttention(nn.Module): | ||||
""" | |||||
TODO | |||||
""" | |||||
def __init__(self, key_size, value_size, dropout=0.1): | def __init__(self, key_size, value_size, dropout=0.1): | ||||
super(DotAttention, self).__init__() | super(DotAttention, self).__init__() | ||||
self.key_size = key_size | self.key_size = key_size | ||||
@@ -51,15 +39,15 @@ class DotAttention(nn.Module): | |||||
class MultiHeadAttention(nn.Module): | class MultiHeadAttention(nn.Module): | ||||
def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||||
""" | |||||
""" | |||||
:param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||||
:param key_size: int, 每个head的维度大小。 | |||||
:param value_size: int,每个head中value的维度。 | |||||
:param num_head: int,head的数量。 | |||||
:param dropout: float。 | |||||
""" | |||||
:param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||||
:param key_size: int, 每个head的维度大小。 | |||||
:param value_size: int,每个head中value的维度。 | |||||
:param num_head: int,head的数量。 | |||||
:param dropout: float。 | |||||
""" | |||||
def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||||
super(MultiHeadAttention, self).__init__() | super(MultiHeadAttention, self).__init__() | ||||
self.input_size = input_size | self.input_size = input_size | ||||
self.key_size = key_size | self.key_size = key_size | ||||
@@ -112,16 +100,16 @@ class MultiHeadAttention(nn.Module): | |||||
class BiAttention(nn.Module): | class BiAttention(nn.Module): | ||||
"""Bi Attention module | |||||
r"""Bi Attention module | |||||
Calculate Bi Attention matrix `e` | Calculate Bi Attention matrix `e` | ||||
.. math:: | .. math:: | ||||
\\begin{array}{ll} \\\\ | |||||
e_ij = {a}^{\\mathbf{T}}_{i}{b}_{j} \\\\ | |||||
\begin{array}{ll} \\ | |||||
e_ij = {a}^{\mathbf{T}}_{i}{b}_{j} \\ | |||||
a_i = | a_i = | ||||
b_j = | b_j = | ||||
\\end{array} | |||||
\end{array} | |||||
""" | """ | ||||
@@ -171,8 +159,11 @@ class BiAttention(nn.Module): | |||||
return out_x1, out_x2 | return out_x1, out_x2 | ||||
class SelfAttention(nn.Module): | class SelfAttention(nn.Module): | ||||
"""Self Attention Module. | |||||
""" | |||||
Self Attention Module. | |||||
:param int input_size: 输入tensor的hidden维度 | :param int input_size: 输入tensor的hidden维度 | ||||
:param int attention_unit: 输出tensor的hidden维度 | :param int attention_unit: 输出tensor的hidden维度 | ||||
:param int attention_hops: | :param int attention_hops: | ||||
@@ -1,21 +1,23 @@ | |||||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool"] | |||||
import torch | import torch | ||||
import torch.nn as nn | import torch.nn as nn | ||||
class MaxPool(nn.Module): | class MaxPool(nn.Module): | ||||
"""Max-pooling模块。""" | |||||
""" | |||||
Max-pooling模块。 | |||||
:param stride: 窗口移动大小,默认为kernel_size | |||||
:param padding: padding的内容,默认为0 | |||||
:param dilation: 控制窗口内元素移动距离的大小 | |||||
:param dimension: MaxPool的维度,支持1,2,3维。 | |||||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||||
:param return_indices: | |||||
:param ceil_mode: | |||||
""" | |||||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | ||||
return_indices=False, ceil_mode=False): | return_indices=False, ceil_mode=False): | ||||
""" | |||||
:param stride: 窗口移动大小,默认为kernel_size | |||||
:param padding: padding的内容,默认为0 | |||||
:param dilation: 控制窗口内元素移动距离的大小 | |||||
:param dimension: MaxPool的维度,支持1,2,3维。 | |||||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||||
:param return_indices: | |||||
:param ceil_mode: | |||||
""" | |||||
super(MaxPool, self).__init__() | super(MaxPool, self).__init__() | ||||
assert (1 <= dimension) and (dimension <= 3) | assert (1 <= dimension) and (dimension <= 3) | ||||
self.dimension = dimension | self.dimension = dimension | ||||
@@ -110,6 +112,7 @@ class AvgPool(nn.Module): | |||||
class MeanPoolWithMask(nn.Module): | class MeanPoolWithMask(nn.Module): | ||||
def __init__(self): | def __init__(self): | ||||
super(MeanPoolWithMask, self).__init__() | super(MeanPoolWithMask, self).__init__() | ||||
self.inf = 10e12 | self.inf = 10e12 | ||||
@@ -1,3 +1,4 @@ | |||||
__all__ = ["MLP", "ConditionalRandomField"] | |||||
__all__ = ["MLP", "ConditionalRandomField","viterbi_decode"] | |||||
from .CRF import ConditionalRandomField | from .CRF import ConditionalRandomField | ||||
from .MLP import MLP | from .MLP import MLP | ||||
from .utils import viterbi_decode |
@@ -1,4 +1,4 @@ | |||||
__all__ = ["viterbi_decode"] | |||||
import torch | import torch | ||||
@@ -1,5 +1,5 @@ | |||||
import torch | import torch | ||||
__all__ = [] | |||||
class TimestepDropout(torch.nn.Dropout): | class TimestepDropout(torch.nn.Dropout): | ||||
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | """This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | ||||
@@ -1,11 +1,9 @@ | |||||
from .conv_maxpool import ConvMaxpool | from .conv_maxpool import ConvMaxpool | ||||
from .embedding import Embedding | from .embedding import Embedding | ||||
from .linear import Linear | |||||
from .lstm import LSTM | from .lstm import LSTM | ||||
from .bert import BertModel | from .bert import BertModel | ||||
__all__ = ["LSTM", | __all__ = ["LSTM", | ||||
"Embedding", | "Embedding", | ||||
"Linear", | |||||
"ConvMaxpool", | "ConvMaxpool", | ||||
"BertModel"] | "BertModel"] |
@@ -6,16 +6,15 @@ from ..utils import initial_parameter | |||||
# from torch.nn.init import xavier_uniform | # from torch.nn.init import xavier_uniform | ||||
class ConvolutionCharEncoder(nn.Module): | class ConvolutionCharEncoder(nn.Module): | ||||
"""char级别的卷积编码器.""" | |||||
""" | |||||
char级别的卷积编码器. | |||||
:param int char_emb_size: char级别embedding的维度. Default: 50 | |||||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||||
:param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||||
:param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||||
:param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||||
""" | |||||
def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): | def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): | ||||
""" | |||||
:param int char_emb_size: char级别embedding的维度. Default: 50 | |||||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||||
:param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||||
:param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||||
:param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||||
""" | |||||
super(ConvolutionCharEncoder, self).__init__() | super(ConvolutionCharEncoder, self).__init__() | ||||
self.convs = nn.ModuleList([ | self.convs = nn.ModuleList([ | ||||
nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) | nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) | ||||
@@ -1,21 +0,0 @@ | |||||
import torch.nn as nn | |||||
from ..utils import initial_parameter | |||||
class Linear(nn.Module): | |||||
""" | |||||
:param int input_size: input size | |||||
:param int output_size: output size | |||||
:param bool bias: | |||||
:param str initial_method: | |||||
""" | |||||
def __init__(self, input_size, output_size, bias=True, initial_method=None): | |||||
super(Linear, self).__init__() | |||||
self.linear = nn.Linear(input_size, output_size, bias) | |||||
initial_parameter(self, initial_method) | |||||
def forward(self, x): | |||||
x = self.linear(x) | |||||
return x |
@@ -19,15 +19,13 @@ class LSTM(nn.Module): | |||||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | ||||
:(batch, seq, feature). Default: ``False`` | :(batch, seq, feature). Default: ``False`` | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
:param get_hidden: 是否返回隐状态 `h` . Default: ``False`` | |||||
""" | """ | ||||
def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | ||||
bidirectional=False, bias=True, initial_method=None, get_hidden=False): | |||||
bidirectional=False, bias=True, initial_method=None): | |||||
super(LSTM, self).__init__() | super(LSTM, self).__init__() | ||||
self.batch_first = batch_first | self.batch_first = batch_first | ||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=bias, batch_first=batch_first, | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=bias, batch_first=batch_first, | ||||
dropout=dropout, bidirectional=bidirectional) | dropout=dropout, bidirectional=bidirectional) | ||||
self.get_hidden = get_hidden | |||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def forward(self, x, seq_len=None, h0=None, c0=None): | def forward(self, x, seq_len=None, h0=None, c0=None): | ||||
@@ -39,7 +37,6 @@ class LSTM(nn.Module): | |||||
:param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | :param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | ||||
:return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | :return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | ||||
和 [batch, hidden_size*num_direction] 最后时刻隐状态. | 和 [batch, hidden_size*num_direction] 最后时刻隐状态. | ||||
若 ``get_hidden=False`` 仅返回输出序列. | |||||
""" | """ | ||||
if h0 is not None and c0 is not None: | if h0 is not None and c0 is not None: | ||||
hx = (h0, c0) | hx = (h0, c0) | ||||
@@ -61,16 +58,4 @@ class LSTM(nn.Module): | |||||
output = output[:, unsort_idx] | output = output[:, unsort_idx] | ||||
else: | else: | ||||
output, hx = self.lstm(x, hx) | output, hx = self.lstm(x, hx) | ||||
if self.get_hidden: | |||||
return output, hx | |||||
return output | |||||
if __name__ == "__main__": | |||||
lstm = LSTM(input_size=2, hidden_size=2, get_hidden=False) | |||||
x = torch.randn((3, 5, 2)) | |||||
seq_lens = torch.tensor([5,1,2]) | |||||
y = lstm(x, seq_lens) | |||||
print(x) | |||||
print(y) | |||||
print(x.size(), y.size(), ) | |||||
return output, hx |
@@ -1,186 +0,0 @@ | |||||
import numpy as np | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
import torch.utils.data | |||||
from torch.nn import Parameter | |||||
class GroupNorm(nn.Module): | |||||
def __init__(self, num_features, num_groups=20, eps=1e-5): | |||||
super(GroupNorm, self).__init__() | |||||
self.weight = nn.Parameter(torch.ones(1, num_features, 1)) | |||||
self.bias = nn.Parameter(torch.zeros(1, num_features, 1)) | |||||
self.num_groups = num_groups | |||||
self.eps = eps | |||||
def forward(self, x): | |||||
N, C, H = x.size() | |||||
G = self.num_groups | |||||
assert C % G == 0 | |||||
x = x.view(N, G, -1) | |||||
mean = x.mean(-1, keepdim=True) | |||||
var = x.var(-1, keepdim=True) | |||||
x = (x - mean) / (var + self.eps).sqrt() | |||||
x = x.view(N, C, H) | |||||
return x * self.weight + self.bias | |||||
class LayerNormalization(nn.Module): | |||||
""" | |||||
:param int layer_size: | |||||
:param float eps: default=1e-3 | |||||
""" | |||||
def __init__(self, layer_size, eps=1e-3): | |||||
super(LayerNormalization, self).__init__() | |||||
self.eps = eps | |||||
self.a_2 = nn.Parameter(torch.ones(1, layer_size, requires_grad=True)) | |||||
self.b_2 = nn.Parameter(torch.zeros(1, layer_size, requires_grad=True)) | |||||
def forward(self, z): | |||||
if z.size(1) == 1: | |||||
return z | |||||
mu = torch.mean(z, keepdim=True, dim=-1) | |||||
sigma = torch.std(z, keepdim=True, dim=-1) | |||||
ln_out = (z - mu) / (sigma + self.eps) | |||||
ln_out = ln_out * self.a_2 + self.b_2 | |||||
return ln_out | |||||
class BiLinear(nn.Module): | |||||
def __init__(self, n_left, n_right, n_out, bias=True): | |||||
""" | |||||
:param int n_left: size of left input | |||||
:param int n_right: size of right input | |||||
:param int n_out: size of output | |||||
:param bool bias: If set to False, the layer will not learn an additive bias. Default: True | |||||
""" | |||||
super(BiLinear, self).__init__() | |||||
self.n_left = n_left | |||||
self.n_right = n_right | |||||
self.n_out = n_out | |||||
self.U = Parameter(torch.Tensor(self.n_out, self.n_left, self.n_right)) | |||||
self.W_l = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||||
self.W_r = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||||
if bias: | |||||
self.bias = Parameter(torch.Tensor(n_out)) | |||||
else: | |||||
self.register_parameter('bias', None) | |||||
self.reset_parameters() | |||||
def reset_parameters(self): | |||||
nn.init.xavier_uniform_(self.W_l) | |||||
nn.init.xavier_uniform_(self.W_r) | |||||
nn.init.constant_(self.bias, 0.) | |||||
nn.init.xavier_uniform_(self.U) | |||||
def forward(self, input_left, input_right): | |||||
""" | |||||
:param Tensor input_left: the left input tensor with shape = [batch1, batch2, ..., left_features] | |||||
:param Tensor input_right: the right input tensor with shape = [batch1, batch2, ..., right_features] | |||||
""" | |||||
left_size = input_left.size() | |||||
right_size = input_right.size() | |||||
assert left_size[:-1] == right_size[:-1], \ | |||||
"batch size of left and right inputs mis-match: (%s, %s)" % (left_size[:-1], right_size[:-1]) | |||||
batch = int(np.prod(left_size[:-1])) | |||||
# convert left and right input to matrices [batch, left_features], [batch, right_features] | |||||
input_left = input_left.view(batch, self.n_left) | |||||
input_right = input_right.view(batch, self.n_right) | |||||
# output [batch, out_features] | |||||
output = F.bilinear(input_left, input_right, self.U, self.bias) | |||||
output = output + \ | |||||
F.linear(input_left, self.W_l, None) + \ | |||||
F.linear(input_right, self.W_r, None) | |||||
# convert back to [batch1, batch2, ..., out_features] | |||||
return output.view(left_size[:-1] + (self.n_out,)) | |||||
def __repr__(self): | |||||
return self.__class__.__name__ + ' (' \ | |||||
+ 'in1_features=' + str(self.n_left) \ | |||||
+ ', in2_features=' + str(self.n_right) \ | |||||
+ ', out_features=' + str(self.n_out) + ')' | |||||
class BiAffine(nn.Module): | |||||
def __init__(self, n_enc, n_dec, n_labels, biaffine=True, **kwargs): | |||||
""" | |||||
:param int n_enc: the dimension of the encoder input. | |||||
:param int n_dec: the dimension of the decoder input. | |||||
:param int n_labels: the number of labels of the crf layer | |||||
:param bool biaffine: if apply bi-affine parameter. | |||||
""" | |||||
super(BiAffine, self).__init__() | |||||
self.n_enc = n_enc | |||||
self.n_dec = n_dec | |||||
self.num_labels = n_labels | |||||
self.biaffine = biaffine | |||||
self.W_d = Parameter(torch.Tensor(self.num_labels, self.n_dec)) | |||||
self.W_e = Parameter(torch.Tensor(self.num_labels, self.n_enc)) | |||||
self.b = Parameter(torch.Tensor(self.num_labels, 1, 1)) | |||||
if self.biaffine: | |||||
self.U = Parameter(torch.Tensor(self.num_labels, self.n_dec, self.n_enc)) | |||||
else: | |||||
self.register_parameter('U', None) | |||||
self.reset_parameters() | |||||
def reset_parameters(self): | |||||
nn.init.xavier_uniform_(self.W_d) | |||||
nn.init.xavier_uniform_(self.W_e) | |||||
nn.init.constant_(self.b, 0.) | |||||
if self.biaffine: | |||||
nn.init.xavier_uniform_(self.U) | |||||
def forward(self, input_d, input_e, mask_d=None, mask_e=None): | |||||
""" | |||||
:param Tensor input_d: the decoder input tensor with shape = [batch, length_decoder, input_size] | |||||
:param Tensor input_e: the child input tensor with shape = [batch, length_encoder, input_size] | |||||
:param mask_d: Tensor or None, the mask tensor for decoder with shape = [batch, length_decoder] | |||||
:param mask_e: Tensor or None, the mask tensor for encoder with shape = [batch, length_encoder] | |||||
:returns: Tensor, the energy tensor with shape = [batch, num_label, length, length] | |||||
""" | |||||
assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.' | |||||
batch, length_decoder, _ = input_d.size() | |||||
_, length_encoder, _ = input_e.size() | |||||
# compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder] | |||||
# the output shape is [batch, num_label, length_decoder] | |||||
out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3) | |||||
# compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder] | |||||
# the output shape is [batch, num_label, length_encoder] | |||||
out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2) | |||||
# output shape [batch, num_label, length_decoder, length_encoder] | |||||
if self.biaffine: | |||||
# compute bi-affine part | |||||
# [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder] | |||||
# output shape [batch, num_label, length_decoder, input_size_encoder] | |||||
output = torch.matmul(input_d.unsqueeze(1), self.U) | |||||
# [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder] | |||||
# output shape [batch, num_label, length_decoder, length_encoder] | |||||
output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3)) | |||||
output = output + out_d + out_e + self.b | |||||
else: | |||||
output = out_d + out_d + self.b | |||||
if mask_d is not None: | |||||
output = output * mask_d.unsqueeze(1).unsqueeze(3) * mask_e.unsqueeze(1).unsqueeze(2) | |||||
return output |
@@ -4,14 +4,6 @@ import torch.nn as nn | |||||
import torch.nn.init as init | import torch.nn.init as init | ||||
def mask_softmax(matrix, mask): | |||||
if mask is None: | |||||
result = torch.nn.functional.softmax(matrix, dim=-1) | |||||
else: | |||||
raise NotImplementedError | |||||
return result | |||||
def initial_parameter(net, initial_method=None): | def initial_parameter(net, initial_method=None): | ||||
"""A method used to initialize the weights of PyTorch models. | """A method used to initialize the weights of PyTorch models. | ||||
@@ -77,7 +69,8 @@ def initial_parameter(net, initial_method=None): | |||||
def seq_mask(seq_len, max_len): | def seq_mask(seq_len, max_len): | ||||
"""Create sequence mask. | |||||
""" | |||||
Create sequence mask. | |||||
:param seq_len: list or torch.Tensor, the lengths of sequences in a batch. | :param seq_len: list or torch.Tensor, the lengths of sequences in a batch. | ||||
:param max_len: int, the maximum sequence length in a batch. | :param max_len: int, the maximum sequence length in a batch. | ||||
@@ -92,7 +85,8 @@ def seq_mask(seq_len, max_len): | |||||
def get_embeddings(init_embed): | def get_embeddings(init_embed): | ||||
"""得到词嵌入 | |||||
""" | |||||
得到词嵌入 TODO | |||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | ||||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | ||||
@@ -1,5 +1,4 @@ | |||||
numpy | numpy | ||||
torch>=0.4.0 | torch>=0.4.0 | ||||
tensorboardX | |||||
tqdm | tqdm | ||||
nltk | nltk |
@@ -1,112 +1,112 @@ | |||||
import os | import os | ||||
import unittest | import unittest | ||||
from fastNLP.io.config_io import ConfigSection, ConfigLoader, ConfigSaver | |||||
from fastNLP.io import ConfigSection, ConfigLoader, ConfigSaver | |||||
class TestConfigSaver(unittest.TestCase): | class TestConfigSaver(unittest.TestCase): | ||||
def test_case_1(self): | def test_case_1(self): | ||||
config_file_dir = "test/io" | |||||
config_file_dir = "." | |||||
config_file_name = "config" | config_file_name = "config" | ||||
config_file_path = os.path.join(config_file_dir, config_file_name) | config_file_path = os.path.join(config_file_dir, config_file_name) | ||||
tmp_config_file_path = os.path.join(config_file_dir, "tmp_config") | tmp_config_file_path = os.path.join(config_file_dir, "tmp_config") | ||||
with open(config_file_path, "r") as f: | with open(config_file_path, "r") as f: | ||||
lines = f.readlines() | lines = f.readlines() | ||||
standard_section = ConfigSection() | standard_section = ConfigSection() | ||||
t_section = ConfigSection() | t_section = ConfigSection() | ||||
ConfigLoader().load_config(config_file_path, {"test": standard_section, "t": t_section}) | ConfigLoader().load_config(config_file_path, {"test": standard_section, "t": t_section}) | ||||
config_saver = ConfigSaver(config_file_path) | config_saver = ConfigSaver(config_file_path) | ||||
section = ConfigSection() | section = ConfigSection() | ||||
section["doubles"] = 0.8 | section["doubles"] = 0.8 | ||||
section["tt"] = 0.5 | section["tt"] = 0.5 | ||||
section["test"] = 105 | section["test"] = 105 | ||||
section["str"] = "this is a str" | section["str"] = "this is a str" | ||||
test_case_2_section = section | test_case_2_section = section | ||||
test_case_2_section["double"] = 0.5 | test_case_2_section["double"] = 0.5 | ||||
for k in section.__dict__.keys(): | for k in section.__dict__.keys(): | ||||
standard_section[k] = section[k] | standard_section[k] = section[k] | ||||
config_saver.save_config_file("test", section) | config_saver.save_config_file("test", section) | ||||
config_saver.save_config_file("another-test", section) | config_saver.save_config_file("another-test", section) | ||||
config_saver.save_config_file("one-another-test", section) | config_saver.save_config_file("one-another-test", section) | ||||
config_saver.save_config_file("test-case-2", section) | config_saver.save_config_file("test-case-2", section) | ||||
test_section = ConfigSection() | test_section = ConfigSection() | ||||
at_section = ConfigSection() | at_section = ConfigSection() | ||||
another_test_section = ConfigSection() | another_test_section = ConfigSection() | ||||
one_another_test_section = ConfigSection() | one_another_test_section = ConfigSection() | ||||
a_test_case_2_section = ConfigSection() | a_test_case_2_section = ConfigSection() | ||||
ConfigLoader().load_config(config_file_path, {"test": test_section, | ConfigLoader().load_config(config_file_path, {"test": test_section, | ||||
"another-test": another_test_section, | "another-test": another_test_section, | ||||
"t": at_section, | "t": at_section, | ||||
"one-another-test": one_another_test_section, | "one-another-test": one_another_test_section, | ||||
"test-case-2": a_test_case_2_section}) | "test-case-2": a_test_case_2_section}) | ||||
assert test_section == standard_section | assert test_section == standard_section | ||||
assert at_section == t_section | assert at_section == t_section | ||||
assert another_test_section == section | assert another_test_section == section | ||||
assert one_another_test_section == section | assert one_another_test_section == section | ||||
assert a_test_case_2_section == test_case_2_section | assert a_test_case_2_section == test_case_2_section | ||||
config_saver.save_config_file("test", section) | config_saver.save_config_file("test", section) | ||||
with open(config_file_path, "w") as f: | with open(config_file_path, "w") as f: | ||||
f.writelines(lines) | f.writelines(lines) | ||||
with open(tmp_config_file_path, "w") as f: | with open(tmp_config_file_path, "w") as f: | ||||
f.write('[test]\n') | f.write('[test]\n') | ||||
f.write('this is an fault example\n') | f.write('this is an fault example\n') | ||||
tmp_config_saver = ConfigSaver(tmp_config_file_path) | tmp_config_saver = ConfigSaver(tmp_config_file_path) | ||||
try: | try: | ||||
tmp_config_saver._read_section() | tmp_config_saver._read_section() | ||||
except Exception as e: | except Exception as e: | ||||
pass | pass | ||||
os.remove(tmp_config_file_path) | os.remove(tmp_config_file_path) | ||||
try: | try: | ||||
tmp_config_saver = ConfigSaver("file-NOT-exist") | tmp_config_saver = ConfigSaver("file-NOT-exist") | ||||
except Exception as e: | except Exception as e: | ||||
pass | pass | ||||
def test_case_2(self): | def test_case_2(self): | ||||
config = "[section_A]\n[section_B]\n" | config = "[section_A]\n[section_B]\n" | ||||
with open("./test.cfg", "w", encoding="utf-8") as f: | with open("./test.cfg", "w", encoding="utf-8") as f: | ||||
f.write(config) | f.write(config) | ||||
saver = ConfigSaver("./test.cfg") | saver = ConfigSaver("./test.cfg") | ||||
section = ConfigSection() | section = ConfigSection() | ||||
section["doubles"] = 0.8 | section["doubles"] = 0.8 | ||||
section["tt"] = [1, 2, 3] | section["tt"] = [1, 2, 3] | ||||
section["test"] = 105 | section["test"] = 105 | ||||
section["str"] = "this is a str" | section["str"] = "this is a str" | ||||
saver.save_config_file("section_A", section) | saver.save_config_file("section_A", section) | ||||
os.system("rm ./test.cfg") | os.system("rm ./test.cfg") | ||||
def test_case_3(self): | def test_case_3(self): | ||||
config = "[section_A]\ndoubles = 0.9\ntt = [1, 2, 3]\n[section_B]\n" | config = "[section_A]\ndoubles = 0.9\ntt = [1, 2, 3]\n[section_B]\n" | ||||
with open("./test.cfg", "w", encoding="utf-8") as f: | with open("./test.cfg", "w", encoding="utf-8") as f: | ||||
f.write(config) | f.write(config) | ||||
saver = ConfigSaver("./test.cfg") | saver = ConfigSaver("./test.cfg") | ||||
section = ConfigSection() | section = ConfigSection() | ||||
section["doubles"] = 0.8 | section["doubles"] = 0.8 | ||||
section["tt"] = [1, 2, 3] | section["tt"] = [1, 2, 3] | ||||
section["test"] = 105 | section["test"] = 105 | ||||
section["str"] = "this is a str" | section["str"] = "this is a str" | ||||
saver.save_config_file("section_A", section) | saver.save_config_file("section_A", section) | ||||
os.system("rm ./test.cfg") | os.system("rm ./test.cfg") |
@@ -1,31 +1,30 @@ | |||||
import unittest | import unittest | ||||
from fastNLP.io.dataset_loader import Conll2003Loader, PeopleDailyCorpusLoader, \ | |||||
CSVLoader, SNLILoader, JsonLoader | |||||
from fastNLP.io import Conll2003Loader, PeopleDailyCorpusLoader, CSVLoader, SNLILoader, JsonLoader | |||||
class TestDatasetLoader(unittest.TestCase): | |||||
class TestDatasetLoader(unittest.TestCase): | |||||
def test_Conll2003Loader(self): | def test_Conll2003Loader(self): | ||||
""" | """ | ||||
Test the the loader of Conll2003 dataset | Test the the loader of Conll2003 dataset | ||||
""" | """ | ||||
dataset_path = "test/data_for_tests/conll_2003_example.txt" | |||||
dataset_path = "../data_for_tests/conll_2003_example.txt" | |||||
loader = Conll2003Loader() | loader = Conll2003Loader() | ||||
dataset_2003 = loader.load(dataset_path) | dataset_2003 = loader.load(dataset_path) | ||||
def test_PeopleDailyCorpusLoader(self): | def test_PeopleDailyCorpusLoader(self): | ||||
data_set = PeopleDailyCorpusLoader().load("test/data_for_tests/people_daily_raw.txt") | |||||
data_set = PeopleDailyCorpusLoader().load("../data_for_tests/people_daily_raw.txt") | |||||
def test_CSVLoader(self): | def test_CSVLoader(self): | ||||
ds = CSVLoader(sep='\t', headers=['words', 'label'])\ | |||||
.load('test/data_for_tests/tutorial_sample_dataset.csv') | |||||
ds = CSVLoader(sep='\t', headers=['words', 'label']) \ | |||||
.load('../data_for_tests/tutorial_sample_dataset.csv') | |||||
assert len(ds) > 0 | assert len(ds) > 0 | ||||
def test_SNLILoader(self): | def test_SNLILoader(self): | ||||
ds = SNLILoader().load('test/data_for_tests/sample_snli.jsonl') | |||||
ds = SNLILoader().load('../data_for_tests/sample_snli.jsonl') | |||||
assert len(ds) == 3 | assert len(ds) == 3 | ||||
def test_JsonLoader(self): | def test_JsonLoader(self): | ||||
ds = JsonLoader().load('test/data_for_tests/sample_snli.jsonl') | |||||
ds = JsonLoader().load('../data_for_tests/sample_snli.jsonl') | |||||
assert len(ds) == 3 | assert len(ds) == 3 | ||||
@@ -1,15 +1,15 @@ | |||||
import unittest | import unittest | ||||
import numpy as np | import numpy as np | ||||
from fastNLP.core.vocabulary import Vocabulary | |||||
from fastNLP.io.embed_loader import EmbedLoader | |||||
from fastNLP import Vocabulary | |||||
from fastNLP.io import EmbedLoader | |||||
class TestEmbedLoader(unittest.TestCase): | class TestEmbedLoader(unittest.TestCase): | ||||
def test_load_with_vocab(self): | def test_load_with_vocab(self): | ||||
vocab = Vocabulary() | vocab = Vocabulary() | ||||
glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||||
word2vec = "test/data_for_tests/word2vec_test.txt" | |||||
glove = "../data_for_tests/glove.6B.50d_test.txt" | |||||
word2vec = "../data_for_tests/word2vec_test.txt" | |||||
vocab.add_word('the') | vocab.add_word('the') | ||||
vocab.add_word('none') | vocab.add_word('none') | ||||
g_m = EmbedLoader.load_with_vocab(glove, vocab) | g_m = EmbedLoader.load_with_vocab(glove, vocab) | ||||
@@ -20,8 +20,8 @@ class TestEmbedLoader(unittest.TestCase): | |||||
def test_load_without_vocab(self): | def test_load_without_vocab(self): | ||||
words = ['the', 'of', 'in', 'a', 'to', 'and'] | words = ['the', 'of', 'in', 'a', 'to', 'and'] | ||||
glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||||
word2vec = "test/data_for_tests/word2vec_test.txt" | |||||
glove = "../data_for_tests/glove.6B.50d_test.txt" | |||||
word2vec = "../data_for_tests/word2vec_test.txt" | |||||
g_m, vocab = EmbedLoader.load_without_vocab(glove) | g_m, vocab = EmbedLoader.load_without_vocab(glove) | ||||
self.assertEqual(g_m.shape, (8, 50)) | self.assertEqual(g_m.shape, (8, 50)) | ||||
for word in words: | for word in words: | ||||