@@ -231,22 +231,29 @@ class Vocabulary(object): | |||||
vocab.from_dataset(train_data1, train_data2, field_name='words') | vocab.from_dataset(train_data1, train_data2, field_name='words') | ||||
:param DataSet datasets: 需要转index的 DataSet, 支持一个或多个. | :param DataSet datasets: 需要转index的 DataSet, 支持一个或多个. | ||||
:param str field_name: 构建词典所使用的 field. | |||||
若有多个 DataSet, 每个DataSet都必须有此 field. | |||||
目前仅支持 ``str`` , ``list(str)`` , ``list(list(str))`` | |||||
:param field_name: 可为 ``str`` 或 ``list(str)`` . | |||||
构建词典所使用的 field(s), 支持一个或多个field | |||||
若有多个 DataSet, 每个DataSet都必须有这些field. | |||||
目前仅支持的field结构: ``str`` , ``list(str)`` , ``list(list(str))`` | |||||
:return self: | :return self: | ||||
""" | """ | ||||
if isinstance(field_name, str): | |||||
field_name = [field_name] | |||||
elif not isinstance(field_name, list): | |||||
raise TypeError('invalid argument field_name: {}'.format(field_name)) | |||||
def construct_vocab(ins): | def construct_vocab(ins): | ||||
field = ins[field_name] | |||||
if isinstance(field, str): | |||||
self.add_word(field) | |||||
elif isinstance(field, list): | |||||
if not isinstance(field[0], list): | |||||
self.add_word_lst(field) | |||||
else: | |||||
if isinstance(field[0][0], list): | |||||
raise RuntimeError("Only support field with 2 dimensions.") | |||||
[self.add_word_lst(w) for w in field] | |||||
for fn in field_name: | |||||
field = ins[fn] | |||||
if isinstance(field, str): | |||||
self.add_word(field) | |||||
elif isinstance(field, list): | |||||
if not isinstance(field[0], list): | |||||
self.add_word_lst(field) | |||||
else: | |||||
if isinstance(field[0][0], list): | |||||
raise RuntimeError("Only support field with 2 dimensions.") | |||||
[self.add_word_lst(w) for w in field] | |||||
for idx, dataset in enumerate(datasets): | for idx, dataset in enumerate(datasets): | ||||
if isinstance(dataset, DataSet): | if isinstance(dataset, DataSet): | ||||
try: | try: | ||||
@@ -1 +1,37 @@ | |||||
""" | |||||
用于IO的模块, 具体包括: | |||||
1. 用于读入 embedding 的 :ref:`EmbedLoader <embed-loader>` 类, | |||||
2. 用于读入数据的 :ref:`DataSetLoader <dataset-loader>` 类 | |||||
3. 用于读写config文件的类, 参考 :ref:`Config-io <config-io>` | |||||
4. 用于保存和载入模型的类, 参考 :ref:`Model-io <model-io>` | |||||
这些类的使用方法可以在对应module的文档下查看. | |||||
""" | |||||
from .embed_loader import EmbedLoader | from .embed_loader import EmbedLoader | ||||
from .dataset_loader import * | |||||
from .config_io import * | |||||
from .model_io import * | |||||
__all__ = [ | |||||
'EmbedLoader', | |||||
'DataSetLoader', | |||||
'CSVLoader', | |||||
'JsonLoader', | |||||
'ConllLoader', | |||||
'SNLILoader', | |||||
'SSTLoader', | |||||
'PeopleDailyCorpusLoader', | |||||
'Conll2003Loader', | |||||
'ConfigLoader', | |||||
'ConfigSection', | |||||
'ConfigSaver', | |||||
'ModelLoader', | |||||
'ModelSaver', | |||||
] |
@@ -1,3 +1,8 @@ | |||||
""" | |||||
.. _config-io: | |||||
用于读入和处理和保存 config 文件 | |||||
""" | |||||
import configparser | import configparser | ||||
import json | import json | ||||
import os | import os | ||||
@@ -1,3 +1,18 @@ | |||||
""" | |||||
.. _dataset-loader: | |||||
DataSetLoader 的 API, 用于读取不同格式的数据, 并返回 `DataSet` , | |||||
得到的 `DataSet` 对象可以直接传入 `Trainer`, `Tester`, 用于模型的训练和测试 | |||||
Example:: | |||||
loader = SNLILoader() | |||||
train_ds = loader.load('path/to/train') | |||||
dev_ds = loader.load('path/to/dev') | |||||
test_ds = loader.load('path/to/test') | |||||
# ... do stuff | |||||
""" | |||||
import os | import os | ||||
import json | import json | ||||
from nltk.tree import Tree | from nltk.tree import Tree | ||||
@@ -55,8 +70,9 @@ def _uncompress(src, dst): | |||||
class DataSetLoader: | class DataSetLoader: | ||||
"""所有`DataSetLoader`的接口 | |||||
""" | |||||
所有`DataSetLoader`的接口 | |||||
""" | """ | ||||
def load(self, path): | def load(self, path): | ||||
@@ -1,3 +1,8 @@ | |||||
""" | |||||
.. _embed-loader: | |||||
用于读取预训练的embedding, 读取结果可直接载入为模型参数 | |||||
""" | |||||
import os | import os | ||||
import numpy as np | import numpy as np | ||||
@@ -1,3 +1,8 @@ | |||||
""" | |||||
.. _model-io: | |||||
用于载入和保存模型 | |||||
""" | |||||
import torch | import torch | ||||
from fastNLP.io.base_loader import BaseLoader | from fastNLP.io.base_loader import BaseLoader | ||||
@@ -1,3 +1,5 @@ | |||||
"""Biaffine Dependency Parser 的 Pytorch 实现. | |||||
""" | |||||
from collections import defaultdict | from collections import defaultdict | ||||
import numpy as np | import numpy as np | ||||
@@ -14,7 +16,7 @@ from fastNLP.modules.encoder.transformer import TransformerEncoder | |||||
from fastNLP.modules.encoder.variational_rnn import VarLSTM | from fastNLP.modules.encoder.variational_rnn import VarLSTM | ||||
from fastNLP.modules.utils import initial_parameter | from fastNLP.modules.utils import initial_parameter | ||||
from fastNLP.modules.utils import seq_mask | from fastNLP.modules.utils import seq_mask | ||||
from fastNLP.modules.utils import get_embeddings | |||||
def _mst(scores): | def _mst(scores): | ||||
""" | """ | ||||
@@ -228,8 +230,9 @@ class BiaffineParser(GraphParser): | |||||
论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) | 论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) | ||||
<https://arxiv.org/abs/1611.01734>`_ . | <https://arxiv.org/abs/1611.01734>`_ . | ||||
:param word_vocab_size: 单词词典大小 | |||||
:param word_emb_dim: 单词词嵌入向量的维度 | |||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||||
此时就以传入的对象作为embedding | |||||
:param pos_vocab_size: part-of-speech 词典大小 | :param pos_vocab_size: part-of-speech 词典大小 | ||||
:param pos_emb_dim: part-of-speech 向量维度 | :param pos_emb_dim: part-of-speech 向量维度 | ||||
:param num_label: 边的类别个数 | :param num_label: 边的类别个数 | ||||
@@ -243,8 +246,7 @@ class BiaffineParser(GraphParser): | |||||
若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False`` | 若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False`` | ||||
""" | """ | ||||
def __init__(self, | def __init__(self, | ||||
word_vocab_size, | |||||
word_emb_dim, | |||||
init_embed, | |||||
pos_vocab_size, | pos_vocab_size, | ||||
pos_emb_dim, | pos_emb_dim, | ||||
num_label, | num_label, | ||||
@@ -258,7 +260,8 @@ class BiaffineParser(GraphParser): | |||||
super(BiaffineParser, self).__init__() | super(BiaffineParser, self).__init__() | ||||
rnn_out_size = 2 * rnn_hidden_size | rnn_out_size = 2 * rnn_hidden_size | ||||
word_hid_dim = pos_hid_dim = rnn_hidden_size | word_hid_dim = pos_hid_dim = rnn_hidden_size | ||||
self.word_embedding = nn.Embedding(num_embeddings=word_vocab_size, embedding_dim=word_emb_dim) | |||||
self.word_embedding = get_embeddings(init_embed) | |||||
word_emb_dim = self.word_embedding.embedding_dim | |||||
self.pos_embedding = nn.Embedding(num_embeddings=pos_vocab_size, embedding_dim=pos_emb_dim) | self.pos_embedding = nn.Embedding(num_embeddings=pos_vocab_size, embedding_dim=pos_emb_dim) | ||||
self.word_fc = nn.Linear(word_emb_dim, word_hid_dim) | self.word_fc = nn.Linear(word_emb_dim, word_hid_dim) | ||||
self.pos_fc = nn.Linear(pos_emb_dim, pos_hid_dim) | self.pos_fc = nn.Linear(pos_emb_dim, pos_hid_dim) | ||||
@@ -14,8 +14,7 @@ class CNNText(torch.nn.Module): | |||||
'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | 'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | ||||
""" | """ | ||||
def __init__(self, vocab_size, | |||||
embed_dim, | |||||
def __init__(self, init_embed, | |||||
num_classes, | num_classes, | ||||
kernel_nums=(3, 4, 5), | kernel_nums=(3, 4, 5), | ||||
kernel_sizes=(3, 4, 5), | kernel_sizes=(3, 4, 5), | ||||
@@ -23,8 +22,8 @@ class CNNText(torch.nn.Module): | |||||
dropout=0.5): | dropout=0.5): | ||||
""" | """ | ||||
:param int vocab_size: 词表的大小 | |||||
:param int embed_dim: 词embedding的维度大小 | |||||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | |||||
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||||
:param int num_classes: 一共有多少类 | :param int num_classes: 一共有多少类 | ||||
:param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | :param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | ||||
:param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | :param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | ||||
@@ -34,9 +33,9 @@ class CNNText(torch.nn.Module): | |||||
super(CNNText, self).__init__() | super(CNNText, self).__init__() | ||||
# no support for pre-trained embedding currently | # no support for pre-trained embedding currently | ||||
self.embed = encoder.Embedding(vocab_size, embed_dim) | |||||
self.embed = encoder.Embedding(init_embed) | |||||
self.conv_pool = encoder.ConvMaxpool( | self.conv_pool = encoder.ConvMaxpool( | ||||
in_channels=embed_dim, | |||||
in_channels=self.embed.embedding_dim, | |||||
out_channels=kernel_nums, | out_channels=kernel_nums, | ||||
kernel_sizes=kernel_sizes, | kernel_sizes=kernel_sizes, | ||||
padding=padding) | padding=padding) | ||||
@@ -11,19 +11,19 @@ class SeqLabeling(BaseModel): | |||||
一个基础的Sequence labeling的模型 | 一个基础的Sequence labeling的模型 | ||||
""" | """ | ||||
def __init__(self, vocab_size, embed_dim, hidden_size, num_classes): | |||||
def __init__(self, init_embed, hidden_size, num_classes): | |||||
""" | """ | ||||
用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | ||||
:param int vocab_size: 词表大小。 | |||||
:param int embed_dim: embedding的维度 | |||||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | |||||
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||||
:param int hidden_size: LSTM隐藏层的大小 | :param int hidden_size: LSTM隐藏层的大小 | ||||
:param int num_classes: 一共有多少类 | :param int num_classes: 一共有多少类 | ||||
""" | """ | ||||
super(SeqLabeling, self).__init__() | super(SeqLabeling, self).__init__() | ||||
self.Embedding = encoder.embedding.Embedding(vocab_size, embed_dim) | |||||
self.Rnn = encoder.lstm.LSTM(embed_dim, hidden_size) | |||||
self.Embedding = encoder.embedding.Embedding(init_embed) | |||||
self.Rnn = encoder.lstm.LSTM(self.Embedding.embedding_dim, hidden_size) | |||||
self.Linear = encoder.linear.Linear(hidden_size, num_classes) | self.Linear = encoder.linear.Linear(hidden_size, num_classes) | ||||
self.Crf = decoder.CRF.ConditionalRandomField(num_classes) | self.Crf = decoder.CRF.ConditionalRandomField(num_classes) | ||||
self.mask = None | self.mask = None | ||||
@@ -103,24 +103,22 @@ class AdvSeqLabel: | |||||
更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | ||||
""" | """ | ||||
def __init__(self, vocab_size, embed_dim, hidden_size, num_classes, dropout=0.3, embedding=None, | |||||
id2words=None, encoding_type='bmes'): | |||||
def __init__(self, init_embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'): | |||||
""" | """ | ||||
:param int vocab_size: 词表的大小 | |||||
:param int embed_dim: embedding的维度 | |||||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | |||||
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||||
:param int hidden_size: LSTM的隐层大小 | :param int hidden_size: LSTM的隐层大小 | ||||
:param int num_classes: 有多少个类 | :param int num_classes: 有多少个类 | ||||
:param float dropout: LSTM中以及DropOut层的drop概率 | :param float dropout: LSTM中以及DropOut层的drop概率 | ||||
:param numpy.ndarray embedding: 预训练的embedding,需要与指定的词表大小等一致 | |||||
:param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' | :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' | ||||
不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 | 不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 | ||||
'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) | 'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) | ||||
:param str encoding_type: 支持"BIO", "BMES", "BEMSO"。 | :param str encoding_type: 支持"BIO", "BMES", "BEMSO"。 | ||||
""" | """ | ||||
self.Embedding = encoder.embedding.Embedding(vocab_size, embed_dim, init_emb=embedding) | |||||
self.norm1 = torch.nn.LayerNorm(embed_dim) | |||||
self.Rnn = torch.nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout, | |||||
self.Embedding = encoder.embedding.Embedding(init_embed) | |||||
self.norm1 = torch.nn.LayerNorm(self.Embedding.embedding_dim) | |||||
self.Rnn = torch.nn.LSTM(input_size=self.Embedding.embedding_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout, | |||||
bidirectional=True, batch_first=True) | bidirectional=True, batch_first=True) | ||||
self.Linear1 = encoder.Linear(hidden_size * 2, hidden_size * 2 // 3) | self.Linear1 = encoder.Linear(hidden_size * 2, hidden_size * 2 // 3) | ||||
self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) | self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) | ||||
@@ -35,8 +35,7 @@ class ESIM(BaseModel): | |||||
self.drop = nn.Dropout(self.dropout) | self.drop = nn.Dropout(self.dropout) | ||||
self.embedding = Encoder.Embedding( | self.embedding = Encoder.Embedding( | ||||
self.vocab_size, self.embed_dim, dropout=self.dropout, | |||||
init_emb=init_embedding, | |||||
(self.vocab_size, self.embed_dim), dropout=self.dropout, | |||||
) | ) | ||||
self.embedding_layer = Encoder.Linear(self.embed_dim, self.hidden_size) | self.embedding_layer = Encoder.Linear(self.embed_dim, self.hidden_size) | ||||
@@ -1,5 +1,8 @@ | |||||
"""Star-Transformer 的 一个 Pytorch 实现. | |||||
""" | |||||
from fastNLP.modules.encoder.star_transformer import StarTransformer | from fastNLP.modules.encoder.star_transformer import StarTransformer | ||||
from fastNLP.core.utils import seq_lens_to_masks | from fastNLP.core.utils import seq_lens_to_masks | ||||
from ..modules.utils import get_embeddings | |||||
import torch | import torch | ||||
from torch import nn | from torch import nn | ||||
@@ -10,8 +13,9 @@ class StarTransEnc(nn.Module): | |||||
""" | """ | ||||
带word embedding的Star-Transformer Encoder | 带word embedding的Star-Transformer Encoder | ||||
:param vocab_size: 词嵌入的词典大小 | |||||
:param emb_dim: 每个词嵌入的特征维度 | |||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||||
此时就以传入的对象作为embedding | |||||
:param num_cls: 输出类别个数 | :param num_cls: 输出类别个数 | ||||
:param hidden_size: 模型中特征维度. | :param hidden_size: 模型中特征维度. | ||||
:param num_layers: 模型层数. | :param num_layers: 模型层数. | ||||
@@ -22,7 +26,7 @@ class StarTransEnc(nn.Module): | |||||
:param emb_dropout: 词嵌入的dropout概率. | :param emb_dropout: 词嵌入的dropout概率. | ||||
:param dropout: 模型除词嵌入外的dropout概率. | :param dropout: 模型除词嵌入外的dropout概率. | ||||
""" | """ | ||||
def __init__(self, vocab_size, emb_dim, | |||||
def __init__(self, init_embed, | |||||
hidden_size, | hidden_size, | ||||
num_layers, | num_layers, | ||||
num_head, | num_head, | ||||
@@ -31,9 +35,10 @@ class StarTransEnc(nn.Module): | |||||
emb_dropout, | emb_dropout, | ||||
dropout): | dropout): | ||||
super(StarTransEnc, self).__init__() | super(StarTransEnc, self).__init__() | ||||
self.embedding = get_embeddings(init_embed) | |||||
emb_dim = self.embedding.embedding_dim | |||||
self.emb_fc = nn.Linear(emb_dim, hidden_size) | self.emb_fc = nn.Linear(emb_dim, hidden_size) | ||||
self.emb_drop = nn.Dropout(emb_dropout) | self.emb_drop = nn.Dropout(emb_dropout) | ||||
self.embedding = nn.Embedding(vocab_size, emb_dim) | |||||
self.encoder = StarTransformer(hidden_size=hidden_size, | self.encoder = StarTransformer(hidden_size=hidden_size, | ||||
num_layers=num_layers, | num_layers=num_layers, | ||||
num_head=num_head, | num_head=num_head, | ||||
@@ -1,20 +1,30 @@ | |||||
import torch.nn as nn | import torch.nn as nn | ||||
from fastNLP.modules.utils import get_embeddings | |||||
class Embedding(nn.Embedding): | |||||
"""Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度""" | |||||
class Embedding(nn.Module): | |||||
"""Embedding组件.""" | |||||
def __init__(self, vocab_size, embed_dim, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): | |||||
def __init__(self, init_embed, padding_idx=None, dropout=0.0, sparse=False, max_norm=None, norm_type=2, | |||||
scale_grad_by_freq=False): | |||||
""" | """ | ||||
:param int vocab_size: 词表大小. | |||||
:param int embed_dim: embedding维度. | |||||
:param int padding_idx: 如果碰到padding_idx则自动补0. | |||||
:param bool sparse: 如果为`True`则权重矩阵是一个sparse的矩阵. | |||||
:param torch.Tensor init_emb: 初始的embedding矩阵. | |||||
:param float dropout: dropout概率. | |||||
:param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int), | |||||
第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||||
:param None,int padding_idx: 该index的Embedding将一直为0. | |||||
:param float dropout: 对Embedding的输出的dropout。 | |||||
:param bool sparse: 如果为True,则对Embedding的梯度将是sparse的,参考Pytorch Embedding获取更多信息。 | |||||
:param None,float max_norm: 每个vector最大的norm能为多大 | |||||
:param int norm_type: norm的类型 | |||||
:param bool scale_grad_by_freq: 如果为True,将会把梯度除以这个词出现的次数. | |||||
""" | """ | ||||
super(Embedding, self).__init__() | |||||
self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx, sparse=sparse, _weight=init_emb) | |||||
embed = get_embeddings(init_embed) | |||||
num_embeddings, embedding_dim = embed.weight.size() | |||||
super().__init__(num_embeddings, embedding_dim, padding_idx=padding_idx, | |||||
max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, | |||||
sparse=sparse, _weight=embed.weight.data) | |||||
del embed | |||||
self.dropout = nn.Dropout(dropout) | self.dropout = nn.Dropout(dropout) | ||||
def forward(self, x): | def forward(self, x): | ||||
@@ -22,5 +32,5 @@ class Embedding(nn.Module): | |||||
:param torch.LongTensor x: [batch, seq_len] | :param torch.LongTensor x: [batch, seq_len] | ||||
:return: torch.Tensor : [batch, seq_len, embed_dim] | :return: torch.Tensor : [batch, seq_len, embed_dim] | ||||
""" | """ | ||||
x = self.embed(x) | |||||
x = super().forward(x) | |||||
return self.dropout(x) | return self.dropout(x) |
@@ -1,3 +1,6 @@ | |||||
"""轻量封装的 Pytorch LSTM 模块. | |||||
可在 forward 时传入序列的长度, 自动对padding做合适的处理. | |||||
""" | |||||
import torch | import torch | ||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.utils.rnn as rnn | import torch.nn.utils.rnn as rnn | ||||
@@ -35,8 +38,8 @@ class LSTM(nn.Module): | |||||
:param h0: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | :param h0: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | ||||
:param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | :param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | ||||
:return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | :return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | ||||
:和 [batch, hidden_size*num_direction] 最后时刻隐状态. | |||||
:若 ``get_hidden=False`` 仅返回输出序列. | |||||
和 [batch, hidden_size*num_direction] 最后时刻隐状态. | |||||
若 ``get_hidden=False`` 仅返回输出序列. | |||||
""" | """ | ||||
if h0 is not None and c0 is not None: | if h0 is not None and c0 is not None: | ||||
hx = (h0, c0) | hx = (h0, c0) | ||||
@@ -1,3 +1,5 @@ | |||||
"""Star-Transformer 的encoder部分的 Pytorch 实现 | |||||
""" | |||||
import torch | import torch | ||||
from torch import nn | from torch import nn | ||||
from torch.nn import functional as F | from torch.nn import functional as F | ||||
@@ -1,3 +1,5 @@ | |||||
"""Variational RNN 的 Pytorch 实现 | |||||
""" | |||||
import torch | import torch | ||||
import torch.nn as nn | import torch.nn as nn | ||||
from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence | from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence | ||||
@@ -28,11 +30,11 @@ class VarRnnCellWrapper(nn.Module): | |||||
""" | """ | ||||
:param PackedSequence input_x: [seq_len, batch_size, input_size] | :param PackedSequence input_x: [seq_len, batch_size, input_size] | ||||
:param hidden: for LSTM, tuple of (h_0, c_0), [batch_size, hidden_size] | :param hidden: for LSTM, tuple of (h_0, c_0), [batch_size, hidden_size] | ||||
:for other RNN, h_0, [batch_size, hidden_size] | |||||
for other RNN, h_0, [batch_size, hidden_size] | |||||
:param mask_x: [batch_size, input_size] dropout mask for input | :param mask_x: [batch_size, input_size] dropout mask for input | ||||
:param mask_h: [batch_size, hidden_size] dropout mask for hidden | :param mask_h: [batch_size, hidden_size] dropout mask for hidden | ||||
:return PackedSequence output: [seq_len, bacth_size, hidden_size] | :return PackedSequence output: [seq_len, bacth_size, hidden_size] | ||||
:hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size] | |||||
hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size] | |||||
for other RNN, h_n, [batch_size, hidden_size] | for other RNN, h_n, [batch_size, hidden_size] | ||||
""" | """ | ||||
def get_hi(hi, h0, size): | def get_hi(hi, h0, size): | ||||
@@ -95,7 +97,7 @@ class VarRNNBase(nn.Module): | |||||
:param num_layers: rnn的层数. Default: 1 | :param num_layers: rnn的层数. Default: 1 | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | ||||
:(batch, seq, feature). Default: ``False`` | |||||
(batch, seq, feature). Default: ``False`` | |||||
:param input_dropout: 对输入的dropout概率. Default: 0 | :param input_dropout: 对输入的dropout概率. Default: 0 | ||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | ||||
@@ -138,7 +140,7 @@ class VarRNNBase(nn.Module): | |||||
:param x: [batch, seq_len, input_size] 输入序列 | :param x: [batch, seq_len, input_size] 输入序列 | ||||
:param hx: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | :param hx: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | ||||
:return (output, ht): [batch, seq_len, hidden_size*num_direction] 输出序列 | :return (output, ht): [batch, seq_len, hidden_size*num_direction] 输出序列 | ||||
:和 [batch, hidden_size*num_direction] 最后时刻隐状态 | |||||
和 [batch, hidden_size*num_direction] 最后时刻隐状态 | |||||
""" | """ | ||||
is_lstm = self.is_lstm | is_lstm = self.is_lstm | ||||
is_packed = isinstance(x, PackedSequence) | is_packed = isinstance(x, PackedSequence) | ||||
@@ -193,7 +195,6 @@ class VarRNNBase(nn.Module): | |||||
return output, hidden | return output, hidden | ||||
class VarLSTM(VarRNNBase): | class VarLSTM(VarRNNBase): | ||||
"""Variational Dropout LSTM. | """Variational Dropout LSTM. | ||||
@@ -202,7 +203,7 @@ class VarLSTM(VarRNNBase): | |||||
:param num_layers: rnn的层数. Default: 1 | :param num_layers: rnn的层数. Default: 1 | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | ||||
:(batch, seq, feature). Default: ``False`` | |||||
(batch, seq, feature). Default: ``False`` | |||||
:param input_dropout: 对输入的dropout概率. Default: 0 | :param input_dropout: 对输入的dropout概率. Default: 0 | ||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的LSTM. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的LSTM. Default: ``False`` | ||||
@@ -211,6 +212,9 @@ class VarLSTM(VarRNNBase): | |||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarLSTM, self).__init__(mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs) | super(VarLSTM, self).__init__(mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | |||||
return super(VarLSTM, self).forward(x, hx) | |||||
class VarRNN(VarRNNBase): | class VarRNN(VarRNNBase): | ||||
"""Variational Dropout RNN. | """Variational Dropout RNN. | ||||
@@ -220,7 +224,7 @@ class VarRNN(VarRNNBase): | |||||
:param num_layers: rnn的层数. Default: 1 | :param num_layers: rnn的层数. Default: 1 | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | ||||
:(batch, seq, feature). Default: ``False`` | |||||
(batch, seq, feature). Default: ``False`` | |||||
:param input_dropout: 对输入的dropout概率. Default: 0 | :param input_dropout: 对输入的dropout概率. Default: 0 | ||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | ||||
@@ -229,6 +233,8 @@ class VarRNN(VarRNNBase): | |||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarRNN, self).__init__(mode="RNN", Cell=nn.RNNCell, *args, **kwargs) | super(VarRNN, self).__init__(mode="RNN", Cell=nn.RNNCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | |||||
return super(VarRNN, self).forward(x, hx) | |||||
class VarGRU(VarRNNBase): | class VarGRU(VarRNNBase): | ||||
"""Variational Dropout GRU. | """Variational Dropout GRU. | ||||
@@ -238,7 +244,7 @@ class VarGRU(VarRNNBase): | |||||
:param num_layers: rnn的层数. Default: 1 | :param num_layers: rnn的层数. Default: 1 | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | ||||
:(batch, seq, feature). Default: ``False`` | |||||
(batch, seq, feature). Default: ``False`` | |||||
:param input_dropout: 对输入的dropout概率. Default: 0 | :param input_dropout: 对输入的dropout概率. Default: 0 | ||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的GRU. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的GRU. Default: ``False`` | ||||
@@ -247,6 +253,9 @@ class VarGRU(VarRNNBase): | |||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarGRU, self).__init__(mode="GRU", Cell=nn.GRUCell, *args, **kwargs) | super(VarGRU, self).__init__(mode="GRU", Cell=nn.GRUCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | |||||
return super(VarGRU, self).forward(x, hx) | |||||
# if __name__ == '__main__': | # if __name__ == '__main__': | ||||
# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1 | # x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1 | ||||
# mask = (x != 0).float().view(3, -1) | # mask = (x != 0).float().view(3, -1) | ||||
@@ -1,3 +1,4 @@ | |||||
import numpy as np | |||||
import torch | import torch | ||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.init as init | import torch.nn.init as init | ||||
@@ -88,3 +89,25 @@ def seq_mask(seq_len, max_len): | |||||
seq_len = seq_len.view(-1, 1).long() # [batch_size, 1] | seq_len = seq_len.view(-1, 1).long() # [batch_size, 1] | ||||
seq_range = torch.arange(start=0, end=max_len, dtype=torch.long, device=seq_len.device).view(1, -1) # [1, max_len] | seq_range = torch.arange(start=0, end=max_len, dtype=torch.long, device=seq_len.device).view(1, -1) # [1, max_len] | ||||
return torch.gt(seq_len, seq_range) # [batch_size, max_len] | return torch.gt(seq_len, seq_range) # [batch_size, max_len] | ||||
def get_embeddings(init_embed): | |||||
"""得到词嵌入 | |||||
:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||||
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||||
此时就以传入的对象作为embedding | |||||
:return nn.Embedding embeddings: | |||||
""" | |||||
if isinstance(init_embed, tuple): | |||||
res = nn.Embedding(num_embeddings=init_embed[0], embedding_dim=init_embed[1]) | |||||
elif isinstance(init_embed, nn.Embedding): | |||||
res = init_embed | |||||
elif isinstance(init_embed, torch.Tensor): | |||||
res = nn.Embedding.from_pretrained(init_embed, freeze=False) | |||||
elif isinstance(init_embed, np.ndarray): | |||||
init_embed = torch.tensor(init_embed, dtype=torch.float32) | |||||
res = nn.Embedding.from_pretrained(init_embed, freeze=False) | |||||
else: | |||||
raise TypeError('invalid init_embed type: {}'.format((type(init_embed)))) | |||||
return res |
@@ -42,7 +42,7 @@ train_data, dev_data = preprocess.run(train_data, dev_data) | |||||
class SELF_ATTENTION_YELP_CLASSIFICATION(BaseModel): | class SELF_ATTENTION_YELP_CLASSIFICATION(BaseModel): | ||||
def __init__(self, args=None): | def __init__(self, args=None): | ||||
super(SELF_ATTENTION_YELP_CLASSIFICATION,self).__init__() | super(SELF_ATTENTION_YELP_CLASSIFICATION,self).__init__() | ||||
self.embedding = Embedding(len(word2index) ,embeding_size , init_emb= None ) | |||||
self.embedding = Embedding((len(word2index) ,embeding_size)) | |||||
self.lstm = LSTM(input_size=embeding_size, hidden_size=lstm_hidden_size, bidirectional=True) | self.lstm = LSTM(input_size=embeding_size, hidden_size=lstm_hidden_size, bidirectional=True) | ||||
self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops) | self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops) | ||||
self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ]) | self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ]) | ||||
@@ -1,5 +1,5 @@ | |||||
numpy>=1.14.2 | |||||
numpy | |||||
torch>=0.4.0 | torch>=0.4.0 | ||||
tensorboardX | tensorboardX | ||||
tqdm>=4.28.1 | |||||
nltk>=3.4.1 | |||||
tqdm | |||||
nltk |
@@ -59,7 +59,7 @@ class TestProcessor(unittest.TestCase): | |||||
def test_ModelProcessor(self): | def test_ModelProcessor(self): | ||||
from fastNLP.models.cnn_text_classification import CNNText | from fastNLP.models.cnn_text_classification import CNNText | ||||
model = CNNText(100, 100, 5) | |||||
model = CNNText((100, 100), 5) | |||||
ins_list = [] | ins_list = [] | ||||
for _ in range(64): | for _ in range(64): | ||||
seq_len = np.random.randint(5, 30) | seq_len = np.random.randint(5, 30) | ||||
@@ -70,7 +70,7 @@ class TestTutorial(unittest.TestCase): | |||||
break | break | ||||
from fastNLP.models import CNNText | from fastNLP.models import CNNText | ||||
model = CNNText(vocab_size=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1) | |||||
model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) | |||||
from fastNLP import Trainer | from fastNLP import Trainer | ||||
from copy import deepcopy | from copy import deepcopy | ||||
@@ -145,13 +145,15 @@ class TestTutorial(unittest.TestCase): | |||||
is_input=True) | is_input=True) | ||||
from fastNLP.models import CNNText | from fastNLP.models import CNNText | ||||
model = CNNText(vocab_size=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1) | |||||
model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) | |||||
from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam | |||||
from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric | |||||
trainer = Trainer(model=model, | trainer = Trainer(model=model, | ||||
train_data=train_data, | train_data=train_data, | ||||
dev_data=dev_data, | dev_data=dev_data, | ||||
loss=CrossEntropyLoss(), | loss=CrossEntropyLoss(), | ||||
optimizer= Adam(), | |||||
metrics=AccuracyMetric(target='label_seq') | metrics=AccuracyMetric(target='label_seq') | ||||
) | ) | ||||
trainer.train() | trainer.train() | ||||
@@ -405,8 +407,7 @@ class TestTutorial(unittest.TestCase): | |||||
# 另一个例子:加载CNN文本分类模型 | # 另一个例子:加载CNN文本分类模型 | ||||
from fastNLP.models import CNNText | from fastNLP.models import CNNText | ||||
cnn_text_model = CNNText(vocab_size=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1) | |||||
cnn_text_model | |||||
cnn_text_model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) | |||||
from fastNLP import CrossEntropyLoss | from fastNLP import CrossEntropyLoss | ||||
from fastNLP import Adam | from fastNLP import Adam | ||||
@@ -421,7 +422,6 @@ class TestTutorial(unittest.TestCase): | |||||
print_every=-1, | print_every=-1, | ||||
validate_every=-1, | validate_every=-1, | ||||
dev_data=dev_data, | dev_data=dev_data, | ||||
use_cuda=False, | |||||
optimizer=Adam(lr=1e-3, weight_decay=0), | optimizer=Adam(lr=1e-3, weight_decay=0), | ||||
check_code_level=-1, | check_code_level=-1, | ||||
metric_key='acc', | metric_key='acc', | ||||