@@ -1,7 +0,0 @@ | |||
fastNLP.io.config\_io module | |||
============================ | |||
.. automodule:: fastNLP.io.config_io | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||
fastNLP.io.file\_reader module | |||
============================== | |||
.. automodule:: fastNLP.io.file_reader | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -12,9 +12,7 @@ Submodules | |||
.. toctree:: | |||
fastNLP.io.base_loader | |||
fastNLP.io.config_io | |||
fastNLP.io.dataset_loader | |||
fastNLP.io.embed_loader | |||
fastNLP.io.file_reader | |||
fastNLP.io.model_io | |||
@@ -9,7 +9,6 @@ fastNLP 依赖如下包:: | |||
torch>=0.4.0 | |||
numpy | |||
tensorboardX | |||
tqdm | |||
nltk | |||
@@ -18,4 +17,4 @@ fastNLP 依赖如下包:: | |||
.. code:: shell | |||
>>> pip install fitlog | |||
>>> pip install fastNLP |
@@ -5,16 +5,13 @@ | |||
2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | |||
3. 用于读写config文件的类, 参考 :doc:`Config-IO <fastNLP.io.config_io>` | |||
4. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||
3. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||
这些类的使用方法可以在对应module的文档下查看. | |||
""" | |||
from .embed_loader import EmbedLoader | |||
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \ | |||
PeopleDailyCorpusLoader, Conll2003Loader | |||
from .config_io import ConfigLoader, ConfigSection, ConfigSaver | |||
from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver | |||
__all__ = [ | |||
@@ -29,10 +26,6 @@ __all__ = [ | |||
'PeopleDailyCorpusLoader', | |||
'Conll2003Loader', | |||
'ConfigLoader', | |||
'ConfigSection', | |||
'ConfigSaver', | |||
'ModelLoader', | |||
'ModelSaver', | |||
] |
@@ -5,7 +5,6 @@ TODO 详细介绍的表格,与主页相对应 | |||
""" | |||
from .base_model import BaseModel | |||
from .biaffine_parser import BiaffineParser, GraphParser | |||
from .char_language_model import CharLM | |||
from .cnn_text_classification import CNNText | |||
from .sequence_modeling import SeqLabeling, AdvSeqLabel | |||
from .snli import ESIM |
@@ -1,138 +0,0 @@ | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
from ..modules.encoder.lstm import LSTM | |||
class Highway(nn.Module): | |||
"""Highway network""" | |||
def __init__(self, input_size): | |||
super(Highway, self).__init__() | |||
self.fc1 = nn.Linear(input_size, input_size, bias=True) | |||
self.fc2 = nn.Linear(input_size, input_size, bias=True) | |||
def forward(self, x): | |||
t = F.sigmoid(self.fc1(x)) | |||
return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1 - t, x) | |||
class CharLM(nn.Module): | |||
"""CNN + highway network + LSTM | |||
# Input:: | |||
4D tensor with shape [batch_size, in_channel, height, width] | |||
# Output:: | |||
2D Tensor with shape [batch_size, vocab_size] | |||
# Arguments:: | |||
char_emb_dim: the size of each character's attention | |||
word_emb_dim: the size of each word's attention | |||
vocab_size: num of unique words | |||
num_char: num of characters | |||
use_gpu: True or False | |||
""" | |||
def __init__(self, char_emb_dim, word_emb_dim, | |||
vocab_size, num_char): | |||
super(CharLM, self).__init__() | |||
self.char_emb_dim = char_emb_dim | |||
self.word_emb_dim = word_emb_dim | |||
self.vocab_size = vocab_size | |||
# char attention layer | |||
self.char_embed = nn.Embedding(num_char, char_emb_dim) | |||
# convolutions of filters with different sizes | |||
self.convolutions = [] | |||
# list of tuples: (the number of filter, width) | |||
self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] | |||
for out_channel, filter_width in self.filter_num_width: | |||
self.convolutions.append( | |||
nn.Conv2d( | |||
1, # in_channel | |||
out_channel, # out_channel | |||
kernel_size=(char_emb_dim, filter_width), # (height, width) | |||
bias=True | |||
) | |||
) | |||
self.highway_input_dim = sum([x for x, y in self.filter_num_width]) | |||
self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False) | |||
# highway net | |||
self.highway1 = Highway(self.highway_input_dim) | |||
self.highway2 = Highway(self.highway_input_dim) | |||
# LSTM | |||
self.lstm_num_layers = 2 | |||
self.lstm = LSTM(self.highway_input_dim, hidden_size=self.word_emb_dim, num_layers=self.lstm_num_layers, | |||
dropout=0.5) | |||
# output layer | |||
self.dropout = nn.Dropout(p=0.5) | |||
self.linear = nn.Linear(self.word_emb_dim, self.vocab_size) | |||
def forward(self, x): | |||
# Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2] | |||
# Return: Variable of Tensor with shape [num_words, len(word_dict)] | |||
lstm_batch_size = x.size()[0] | |||
lstm_seq_len = x.size()[1] | |||
x = x.contiguous().view(-1, x.size()[2]) | |||
# [num_seq*seq_len, max_word_len+2] | |||
x = self.char_embed(x) | |||
# [num_seq*seq_len, max_word_len+2, char_emb_dim] | |||
x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | |||
# [num_seq*seq_len, 1, max_word_len+2, char_emb_dim] | |||
x = self.conv_layers(x) | |||
# [num_seq*seq_len, total_num_filters] | |||
x = self.batch_norm(x) | |||
# [num_seq*seq_len, total_num_filters] | |||
x = self.highway1(x) | |||
x = self.highway2(x) | |||
# [num_seq*seq_len, total_num_filters] | |||
x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) | |||
# [num_seq, seq_len, total_num_filters] | |||
x = self.lstm(x) | |||
# [seq_len, num_seq, hidden_size] | |||
x = self.dropout(x) | |||
# [seq_len, num_seq, hidden_size] | |||
x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) | |||
# [num_seq*seq_len, hidden_size] | |||
x = self.linear(x) | |||
# [num_seq*seq_len, vocab_size] | |||
return x | |||
def conv_layers(self, x): | |||
chosen_list = list() | |||
for conv in self.convolutions: | |||
feature_map = F.tanh(conv(x)) | |||
# (batch_size, out_channel, 1, max_word_len-width+1) | |||
chosen = torch.max(feature_map, 3)[0] | |||
# (batch_size, out_channel, 1) | |||
chosen = chosen.squeeze() | |||
# (batch_size, out_channel) | |||
chosen_list.append(chosen) | |||
# (batch_size, total_num_filers) | |||
return torch.cat(chosen_list, 1) |
@@ -12,19 +12,21 @@ my_inf = 10e12 | |||
class ESIM(BaseModel): | |||
"""ESIM模型的一个PyTorch实现。 | |||
""" | |||
ESIM模型的一个PyTorch实现。 | |||
ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | |||
:param int vocab_size: 词表大小 | |||
:param int embed_dim: 词嵌入维度 | |||
:param int hidden_size: LSTM隐层大小 | |||
:param float dropout: dropout大小,默认为0 | |||
:param int num_classes: 标签数目,默认为3 | |||
:param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||
""" | |||
def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None): | |||
""" | |||
:param int vocab_size: 词表大小 | |||
:param int embed_dim: 词嵌入维度 | |||
:param int hidden_size: LSTM隐层大小 | |||
:param float dropout: dropout大小,默认为0 | |||
:param int num_classes: 标签数目,默认为3 | |||
:param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||
""" | |||
super(ESIM, self).__init__() | |||
self.vocab_size = vocab_size | |||
self.embed_dim = embed_dim | |||
@@ -12,8 +12,8 @@ from . import decoder | |||
from . import encoder | |||
from .aggregator import * | |||
from .decoder import * | |||
from .other_modules import * | |||
from .dropout import TimestepDropout | |||
from .encoder import * | |||
from .utils import get_embeddings | |||
__version__ = '0.0.0' |
@@ -1,11 +1,7 @@ | |||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MeanPoolWithMask", "KMaxPool", "Attention", "BiAttention", | |||
"SelfAttention"] | |||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MultiHeadAttention"] | |||
from .pooling import MaxPool | |||
from .pooling import MaxPoolWithMask | |||
from .pooling import AvgPool | |||
from .pooling import MeanPoolWithMask | |||
from .pooling import KMaxPool | |||
from .attention import Attention | |||
from .attention import BiAttention | |||
from .attention import SelfAttention | |||
from .attention import MultiHeadAttention |
@@ -1,3 +1,4 @@ | |||
__all__ =["MultiHeadAttention"] | |||
import math | |||
import torch | |||
@@ -5,27 +6,14 @@ import torch.nn.functional as F | |||
from torch import nn | |||
from ..dropout import TimestepDropout | |||
from ..utils import mask_softmax | |||
from ..utils import initial_parameter | |||
class Attention(torch.nn.Module): | |||
def __init__(self, normalize=False): | |||
super(Attention, self).__init__() | |||
self.normalize = normalize | |||
def forward(self, query, memory, mask): | |||
similarities = self._atten_forward(query, memory) | |||
if self.normalize: | |||
return mask_softmax(similarities, mask) | |||
return similarities | |||
def _atten_forward(self, query, memory): | |||
raise NotImplementedError | |||
class DotAttention(nn.Module): | |||
""" | |||
TODO | |||
""" | |||
def __init__(self, key_size, value_size, dropout=0.1): | |||
super(DotAttention, self).__init__() | |||
self.key_size = key_size | |||
@@ -51,15 +39,15 @@ class DotAttention(nn.Module): | |||
class MultiHeadAttention(nn.Module): | |||
def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||
""" | |||
""" | |||
:param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||
:param key_size: int, 每个head的维度大小。 | |||
:param value_size: int,每个head中value的维度。 | |||
:param num_head: int,head的数量。 | |||
:param dropout: float。 | |||
""" | |||
:param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||
:param key_size: int, 每个head的维度大小。 | |||
:param value_size: int,每个head中value的维度。 | |||
:param num_head: int,head的数量。 | |||
:param dropout: float。 | |||
""" | |||
def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||
super(MultiHeadAttention, self).__init__() | |||
self.input_size = input_size | |||
self.key_size = key_size | |||
@@ -112,16 +100,16 @@ class MultiHeadAttention(nn.Module): | |||
class BiAttention(nn.Module): | |||
"""Bi Attention module | |||
r"""Bi Attention module | |||
Calculate Bi Attention matrix `e` | |||
.. math:: | |||
\\begin{array}{ll} \\\\ | |||
e_ij = {a}^{\\mathbf{T}}_{i}{b}_{j} \\\\ | |||
\begin{array}{ll} \\ | |||
e_ij = {a}^{\mathbf{T}}_{i}{b}_{j} \\ | |||
a_i = | |||
b_j = | |||
\\end{array} | |||
\end{array} | |||
""" | |||
@@ -171,8 +159,11 @@ class BiAttention(nn.Module): | |||
return out_x1, out_x2 | |||
class SelfAttention(nn.Module): | |||
"""Self Attention Module. | |||
""" | |||
Self Attention Module. | |||
:param int input_size: 输入tensor的hidden维度 | |||
:param int attention_unit: 输出tensor的hidden维度 | |||
:param int attention_hops: | |||
@@ -1,21 +1,23 @@ | |||
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool"] | |||
import torch | |||
import torch.nn as nn | |||
class MaxPool(nn.Module): | |||
"""Max-pooling模块。""" | |||
""" | |||
Max-pooling模块。 | |||
:param stride: 窗口移动大小,默认为kernel_size | |||
:param padding: padding的内容,默认为0 | |||
:param dilation: 控制窗口内元素移动距离的大小 | |||
:param dimension: MaxPool的维度,支持1,2,3维。 | |||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||
:param return_indices: | |||
:param ceil_mode: | |||
""" | |||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | |||
return_indices=False, ceil_mode=False): | |||
""" | |||
:param stride: 窗口移动大小,默认为kernel_size | |||
:param padding: padding的内容,默认为0 | |||
:param dilation: 控制窗口内元素移动距离的大小 | |||
:param dimension: MaxPool的维度,支持1,2,3维。 | |||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||
:param return_indices: | |||
:param ceil_mode: | |||
""" | |||
super(MaxPool, self).__init__() | |||
assert (1 <= dimension) and (dimension <= 3) | |||
self.dimension = dimension | |||
@@ -110,6 +112,7 @@ class AvgPool(nn.Module): | |||
class MeanPoolWithMask(nn.Module): | |||
def __init__(self): | |||
super(MeanPoolWithMask, self).__init__() | |||
self.inf = 10e12 | |||
@@ -1,3 +1,4 @@ | |||
__all__ = ["MLP", "ConditionalRandomField"] | |||
__all__ = ["MLP", "ConditionalRandomField","viterbi_decode"] | |||
from .CRF import ConditionalRandomField | |||
from .MLP import MLP | |||
from .utils import viterbi_decode |
@@ -1,4 +1,4 @@ | |||
__all__ = ["viterbi_decode"] | |||
import torch | |||
@@ -1,5 +1,5 @@ | |||
import torch | |||
__all__ = [] | |||
class TimestepDropout(torch.nn.Dropout): | |||
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | |||
@@ -1,11 +1,9 @@ | |||
from .conv_maxpool import ConvMaxpool | |||
from .embedding import Embedding | |||
from .linear import Linear | |||
from .lstm import LSTM | |||
from .bert import BertModel | |||
__all__ = ["LSTM", | |||
"Embedding", | |||
"Linear", | |||
"ConvMaxpool", | |||
"BertModel"] |
@@ -6,16 +6,15 @@ from ..utils import initial_parameter | |||
# from torch.nn.init import xavier_uniform | |||
class ConvolutionCharEncoder(nn.Module): | |||
"""char级别的卷积编码器.""" | |||
""" | |||
char级别的卷积编码器. | |||
:param int char_emb_size: char级别embedding的维度. Default: 50 | |||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||
:param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||
:param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||
:param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||
""" | |||
def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): | |||
""" | |||
:param int char_emb_size: char级别embedding的维度. Default: 50 | |||
例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||
:param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||
:param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||
:param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||
""" | |||
super(ConvolutionCharEncoder, self).__init__() | |||
self.convs = nn.ModuleList([ | |||
nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) | |||
@@ -1,21 +0,0 @@ | |||
import torch.nn as nn | |||
from ..utils import initial_parameter | |||
class Linear(nn.Module): | |||
""" | |||
:param int input_size: input size | |||
:param int output_size: output size | |||
:param bool bias: | |||
:param str initial_method: | |||
""" | |||
def __init__(self, input_size, output_size, bias=True, initial_method=None): | |||
super(Linear, self).__init__() | |||
self.linear = nn.Linear(input_size, output_size, bias) | |||
initial_parameter(self, initial_method) | |||
def forward(self, x): | |||
x = self.linear(x) | |||
return x |
@@ -19,15 +19,13 @@ class LSTM(nn.Module): | |||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | |||
:(batch, seq, feature). Default: ``False`` | |||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | |||
:param get_hidden: 是否返回隐状态 `h` . Default: ``False`` | |||
""" | |||
def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | |||
bidirectional=False, bias=True, initial_method=None, get_hidden=False): | |||
bidirectional=False, bias=True, initial_method=None): | |||
super(LSTM, self).__init__() | |||
self.batch_first = batch_first | |||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=bias, batch_first=batch_first, | |||
dropout=dropout, bidirectional=bidirectional) | |||
self.get_hidden = get_hidden | |||
initial_parameter(self, initial_method) | |||
def forward(self, x, seq_len=None, h0=None, c0=None): | |||
@@ -39,7 +37,6 @@ class LSTM(nn.Module): | |||
:param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | |||
:return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | |||
和 [batch, hidden_size*num_direction] 最后时刻隐状态. | |||
若 ``get_hidden=False`` 仅返回输出序列. | |||
""" | |||
if h0 is not None and c0 is not None: | |||
hx = (h0, c0) | |||
@@ -61,16 +58,4 @@ class LSTM(nn.Module): | |||
output = output[:, unsort_idx] | |||
else: | |||
output, hx = self.lstm(x, hx) | |||
if self.get_hidden: | |||
return output, hx | |||
return output | |||
if __name__ == "__main__": | |||
lstm = LSTM(input_size=2, hidden_size=2, get_hidden=False) | |||
x = torch.randn((3, 5, 2)) | |||
seq_lens = torch.tensor([5,1,2]) | |||
y = lstm(x, seq_lens) | |||
print(x) | |||
print(y) | |||
print(x.size(), y.size(), ) | |||
return output, hx |
@@ -1,186 +0,0 @@ | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
import torch.utils.data | |||
from torch.nn import Parameter | |||
class GroupNorm(nn.Module): | |||
def __init__(self, num_features, num_groups=20, eps=1e-5): | |||
super(GroupNorm, self).__init__() | |||
self.weight = nn.Parameter(torch.ones(1, num_features, 1)) | |||
self.bias = nn.Parameter(torch.zeros(1, num_features, 1)) | |||
self.num_groups = num_groups | |||
self.eps = eps | |||
def forward(self, x): | |||
N, C, H = x.size() | |||
G = self.num_groups | |||
assert C % G == 0 | |||
x = x.view(N, G, -1) | |||
mean = x.mean(-1, keepdim=True) | |||
var = x.var(-1, keepdim=True) | |||
x = (x - mean) / (var + self.eps).sqrt() | |||
x = x.view(N, C, H) | |||
return x * self.weight + self.bias | |||
class LayerNormalization(nn.Module): | |||
""" | |||
:param int layer_size: | |||
:param float eps: default=1e-3 | |||
""" | |||
def __init__(self, layer_size, eps=1e-3): | |||
super(LayerNormalization, self).__init__() | |||
self.eps = eps | |||
self.a_2 = nn.Parameter(torch.ones(1, layer_size, requires_grad=True)) | |||
self.b_2 = nn.Parameter(torch.zeros(1, layer_size, requires_grad=True)) | |||
def forward(self, z): | |||
if z.size(1) == 1: | |||
return z | |||
mu = torch.mean(z, keepdim=True, dim=-1) | |||
sigma = torch.std(z, keepdim=True, dim=-1) | |||
ln_out = (z - mu) / (sigma + self.eps) | |||
ln_out = ln_out * self.a_2 + self.b_2 | |||
return ln_out | |||
class BiLinear(nn.Module): | |||
def __init__(self, n_left, n_right, n_out, bias=True): | |||
""" | |||
:param int n_left: size of left input | |||
:param int n_right: size of right input | |||
:param int n_out: size of output | |||
:param bool bias: If set to False, the layer will not learn an additive bias. Default: True | |||
""" | |||
super(BiLinear, self).__init__() | |||
self.n_left = n_left | |||
self.n_right = n_right | |||
self.n_out = n_out | |||
self.U = Parameter(torch.Tensor(self.n_out, self.n_left, self.n_right)) | |||
self.W_l = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||
self.W_r = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||
if bias: | |||
self.bias = Parameter(torch.Tensor(n_out)) | |||
else: | |||
self.register_parameter('bias', None) | |||
self.reset_parameters() | |||
def reset_parameters(self): | |||
nn.init.xavier_uniform_(self.W_l) | |||
nn.init.xavier_uniform_(self.W_r) | |||
nn.init.constant_(self.bias, 0.) | |||
nn.init.xavier_uniform_(self.U) | |||
def forward(self, input_left, input_right): | |||
""" | |||
:param Tensor input_left: the left input tensor with shape = [batch1, batch2, ..., left_features] | |||
:param Tensor input_right: the right input tensor with shape = [batch1, batch2, ..., right_features] | |||
""" | |||
left_size = input_left.size() | |||
right_size = input_right.size() | |||
assert left_size[:-1] == right_size[:-1], \ | |||
"batch size of left and right inputs mis-match: (%s, %s)" % (left_size[:-1], right_size[:-1]) | |||
batch = int(np.prod(left_size[:-1])) | |||
# convert left and right input to matrices [batch, left_features], [batch, right_features] | |||
input_left = input_left.view(batch, self.n_left) | |||
input_right = input_right.view(batch, self.n_right) | |||
# output [batch, out_features] | |||
output = F.bilinear(input_left, input_right, self.U, self.bias) | |||
output = output + \ | |||
F.linear(input_left, self.W_l, None) + \ | |||
F.linear(input_right, self.W_r, None) | |||
# convert back to [batch1, batch2, ..., out_features] | |||
return output.view(left_size[:-1] + (self.n_out,)) | |||
def __repr__(self): | |||
return self.__class__.__name__ + ' (' \ | |||
+ 'in1_features=' + str(self.n_left) \ | |||
+ ', in2_features=' + str(self.n_right) \ | |||
+ ', out_features=' + str(self.n_out) + ')' | |||
class BiAffine(nn.Module): | |||
def __init__(self, n_enc, n_dec, n_labels, biaffine=True, **kwargs): | |||
""" | |||
:param int n_enc: the dimension of the encoder input. | |||
:param int n_dec: the dimension of the decoder input. | |||
:param int n_labels: the number of labels of the crf layer | |||
:param bool biaffine: if apply bi-affine parameter. | |||
""" | |||
super(BiAffine, self).__init__() | |||
self.n_enc = n_enc | |||
self.n_dec = n_dec | |||
self.num_labels = n_labels | |||
self.biaffine = biaffine | |||
self.W_d = Parameter(torch.Tensor(self.num_labels, self.n_dec)) | |||
self.W_e = Parameter(torch.Tensor(self.num_labels, self.n_enc)) | |||
self.b = Parameter(torch.Tensor(self.num_labels, 1, 1)) | |||
if self.biaffine: | |||
self.U = Parameter(torch.Tensor(self.num_labels, self.n_dec, self.n_enc)) | |||
else: | |||
self.register_parameter('U', None) | |||
self.reset_parameters() | |||
def reset_parameters(self): | |||
nn.init.xavier_uniform_(self.W_d) | |||
nn.init.xavier_uniform_(self.W_e) | |||
nn.init.constant_(self.b, 0.) | |||
if self.biaffine: | |||
nn.init.xavier_uniform_(self.U) | |||
def forward(self, input_d, input_e, mask_d=None, mask_e=None): | |||
""" | |||
:param Tensor input_d: the decoder input tensor with shape = [batch, length_decoder, input_size] | |||
:param Tensor input_e: the child input tensor with shape = [batch, length_encoder, input_size] | |||
:param mask_d: Tensor or None, the mask tensor for decoder with shape = [batch, length_decoder] | |||
:param mask_e: Tensor or None, the mask tensor for encoder with shape = [batch, length_encoder] | |||
:returns: Tensor, the energy tensor with shape = [batch, num_label, length, length] | |||
""" | |||
assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.' | |||
batch, length_decoder, _ = input_d.size() | |||
_, length_encoder, _ = input_e.size() | |||
# compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder] | |||
# the output shape is [batch, num_label, length_decoder] | |||
out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3) | |||
# compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder] | |||
# the output shape is [batch, num_label, length_encoder] | |||
out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2) | |||
# output shape [batch, num_label, length_decoder, length_encoder] | |||
if self.biaffine: | |||
# compute bi-affine part | |||
# [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder] | |||
# output shape [batch, num_label, length_decoder, input_size_encoder] | |||
output = torch.matmul(input_d.unsqueeze(1), self.U) | |||
# [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder] | |||
# output shape [batch, num_label, length_decoder, length_encoder] | |||
output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3)) | |||
output = output + out_d + out_e + self.b | |||
else: | |||
output = out_d + out_d + self.b | |||
if mask_d is not None: | |||
output = output * mask_d.unsqueeze(1).unsqueeze(3) * mask_e.unsqueeze(1).unsqueeze(2) | |||
return output |
@@ -4,14 +4,6 @@ import torch.nn as nn | |||
import torch.nn.init as init | |||
def mask_softmax(matrix, mask): | |||
if mask is None: | |||
result = torch.nn.functional.softmax(matrix, dim=-1) | |||
else: | |||
raise NotImplementedError | |||
return result | |||
def initial_parameter(net, initial_method=None): | |||
"""A method used to initialize the weights of PyTorch models. | |||
@@ -77,7 +69,8 @@ def initial_parameter(net, initial_method=None): | |||
def seq_mask(seq_len, max_len): | |||
"""Create sequence mask. | |||
""" | |||
Create sequence mask. | |||
:param seq_len: list or torch.Tensor, the lengths of sequences in a batch. | |||
:param max_len: int, the maximum sequence length in a batch. | |||
@@ -92,7 +85,8 @@ def seq_mask(seq_len, max_len): | |||
def get_embeddings(init_embed): | |||
"""得到词嵌入 | |||
""" | |||
得到词嵌入 TODO | |||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
@@ -1,5 +1,4 @@ | |||
numpy | |||
torch>=0.4.0 | |||
tensorboardX | |||
tqdm | |||
nltk |
@@ -1,112 +1,112 @@ | |||
import os | |||
import unittest | |||
from fastNLP.io.config_io import ConfigSection, ConfigLoader, ConfigSaver | |||
from fastNLP.io import ConfigSection, ConfigLoader, ConfigSaver | |||
class TestConfigSaver(unittest.TestCase): | |||
def test_case_1(self): | |||
config_file_dir = "test/io" | |||
config_file_dir = "." | |||
config_file_name = "config" | |||
config_file_path = os.path.join(config_file_dir, config_file_name) | |||
tmp_config_file_path = os.path.join(config_file_dir, "tmp_config") | |||
with open(config_file_path, "r") as f: | |||
lines = f.readlines() | |||
standard_section = ConfigSection() | |||
t_section = ConfigSection() | |||
ConfigLoader().load_config(config_file_path, {"test": standard_section, "t": t_section}) | |||
config_saver = ConfigSaver(config_file_path) | |||
section = ConfigSection() | |||
section["doubles"] = 0.8 | |||
section["tt"] = 0.5 | |||
section["test"] = 105 | |||
section["str"] = "this is a str" | |||
test_case_2_section = section | |||
test_case_2_section["double"] = 0.5 | |||
for k in section.__dict__.keys(): | |||
standard_section[k] = section[k] | |||
config_saver.save_config_file("test", section) | |||
config_saver.save_config_file("another-test", section) | |||
config_saver.save_config_file("one-another-test", section) | |||
config_saver.save_config_file("test-case-2", section) | |||
test_section = ConfigSection() | |||
at_section = ConfigSection() | |||
another_test_section = ConfigSection() | |||
one_another_test_section = ConfigSection() | |||
a_test_case_2_section = ConfigSection() | |||
ConfigLoader().load_config(config_file_path, {"test": test_section, | |||
"another-test": another_test_section, | |||
"t": at_section, | |||
"one-another-test": one_another_test_section, | |||
"test-case-2": a_test_case_2_section}) | |||
assert test_section == standard_section | |||
assert at_section == t_section | |||
assert another_test_section == section | |||
assert one_another_test_section == section | |||
assert a_test_case_2_section == test_case_2_section | |||
config_saver.save_config_file("test", section) | |||
with open(config_file_path, "w") as f: | |||
f.writelines(lines) | |||
with open(tmp_config_file_path, "w") as f: | |||
f.write('[test]\n') | |||
f.write('this is an fault example\n') | |||
tmp_config_saver = ConfigSaver(tmp_config_file_path) | |||
try: | |||
tmp_config_saver._read_section() | |||
except Exception as e: | |||
pass | |||
os.remove(tmp_config_file_path) | |||
try: | |||
tmp_config_saver = ConfigSaver("file-NOT-exist") | |||
except Exception as e: | |||
pass | |||
def test_case_2(self): | |||
config = "[section_A]\n[section_B]\n" | |||
with open("./test.cfg", "w", encoding="utf-8") as f: | |||
f.write(config) | |||
saver = ConfigSaver("./test.cfg") | |||
section = ConfigSection() | |||
section["doubles"] = 0.8 | |||
section["tt"] = [1, 2, 3] | |||
section["test"] = 105 | |||
section["str"] = "this is a str" | |||
saver.save_config_file("section_A", section) | |||
os.system("rm ./test.cfg") | |||
def test_case_3(self): | |||
config = "[section_A]\ndoubles = 0.9\ntt = [1, 2, 3]\n[section_B]\n" | |||
with open("./test.cfg", "w", encoding="utf-8") as f: | |||
f.write(config) | |||
saver = ConfigSaver("./test.cfg") | |||
section = ConfigSection() | |||
section["doubles"] = 0.8 | |||
section["tt"] = [1, 2, 3] | |||
section["test"] = 105 | |||
section["str"] = "this is a str" | |||
saver.save_config_file("section_A", section) | |||
os.system("rm ./test.cfg") |
@@ -1,31 +1,30 @@ | |||
import unittest | |||
from fastNLP.io.dataset_loader import Conll2003Loader, PeopleDailyCorpusLoader, \ | |||
CSVLoader, SNLILoader, JsonLoader | |||
from fastNLP.io import Conll2003Loader, PeopleDailyCorpusLoader, CSVLoader, SNLILoader, JsonLoader | |||
class TestDatasetLoader(unittest.TestCase): | |||
class TestDatasetLoader(unittest.TestCase): | |||
def test_Conll2003Loader(self): | |||
""" | |||
Test the the loader of Conll2003 dataset | |||
""" | |||
dataset_path = "test/data_for_tests/conll_2003_example.txt" | |||
dataset_path = "../data_for_tests/conll_2003_example.txt" | |||
loader = Conll2003Loader() | |||
dataset_2003 = loader.load(dataset_path) | |||
def test_PeopleDailyCorpusLoader(self): | |||
data_set = PeopleDailyCorpusLoader().load("test/data_for_tests/people_daily_raw.txt") | |||
data_set = PeopleDailyCorpusLoader().load("../data_for_tests/people_daily_raw.txt") | |||
def test_CSVLoader(self): | |||
ds = CSVLoader(sep='\t', headers=['words', 'label'])\ | |||
.load('test/data_for_tests/tutorial_sample_dataset.csv') | |||
ds = CSVLoader(sep='\t', headers=['words', 'label']) \ | |||
.load('../data_for_tests/tutorial_sample_dataset.csv') | |||
assert len(ds) > 0 | |||
def test_SNLILoader(self): | |||
ds = SNLILoader().load('test/data_for_tests/sample_snli.jsonl') | |||
ds = SNLILoader().load('../data_for_tests/sample_snli.jsonl') | |||
assert len(ds) == 3 | |||
def test_JsonLoader(self): | |||
ds = JsonLoader().load('test/data_for_tests/sample_snli.jsonl') | |||
ds = JsonLoader().load('../data_for_tests/sample_snli.jsonl') | |||
assert len(ds) == 3 | |||
@@ -1,15 +1,15 @@ | |||
import unittest | |||
import numpy as np | |||
from fastNLP.core.vocabulary import Vocabulary | |||
from fastNLP.io.embed_loader import EmbedLoader | |||
from fastNLP import Vocabulary | |||
from fastNLP.io import EmbedLoader | |||
class TestEmbedLoader(unittest.TestCase): | |||
def test_load_with_vocab(self): | |||
vocab = Vocabulary() | |||
glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||
word2vec = "test/data_for_tests/word2vec_test.txt" | |||
glove = "../data_for_tests/glove.6B.50d_test.txt" | |||
word2vec = "../data_for_tests/word2vec_test.txt" | |||
vocab.add_word('the') | |||
vocab.add_word('none') | |||
g_m = EmbedLoader.load_with_vocab(glove, vocab) | |||
@@ -20,8 +20,8 @@ class TestEmbedLoader(unittest.TestCase): | |||
def test_load_without_vocab(self): | |||
words = ['the', 'of', 'in', 'a', 'to', 'and'] | |||
glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||
word2vec = "test/data_for_tests/word2vec_test.txt" | |||
glove = "../data_for_tests/glove.6B.50d_test.txt" | |||
word2vec = "../data_for_tests/word2vec_test.txt" | |||
g_m, vocab = EmbedLoader.load_without_vocab(glove) | |||
self.assertEqual(g_m.shape, (8, 50)) | |||
for word in words: | |||