Browse Source

- update doc

tags/v0.4.10
yunfan 5 years ago
parent
commit
2c9484c274
11 changed files with 99 additions and 14 deletions
  1. +36
    -0
      fastNLP/io/__init__.py
  2. +5
    -0
      fastNLP/io/config_io.py
  3. +17
    -1
      fastNLP/io/dataset_loader.py
  4. +5
    -0
      fastNLP/io/embed_loader.py
  5. +5
    -0
      fastNLP/io/model_io.py
  6. +2
    -0
      fastNLP/models/biaffine_parser.py
  7. +2
    -0
      fastNLP/models/star_transformer.py
  8. +5
    -2
      fastNLP/modules/encoder/lstm.py
  9. +2
    -0
      fastNLP/modules/encoder/star_transformer.py
  10. +17
    -8
      fastNLP/modules/encoder/variational_rnn.py
  11. +3
    -3
      requirements.txt

+ 36
- 0
fastNLP/io/__init__.py View File

@@ -1 +1,37 @@
"""
用于IO的模块, 具体包括:

1. 用于读入 embedding 的 :ref:`EmbedLoader <embed-loader>` 类,

2. 用于读入数据的 :ref:`DataSetLoader <dataset-loader>` 类

3. 用于读写config文件的类, 参考 :ref:`Config-io <config-io>`

4. 用于保存和载入模型的类, 参考 :ref:`Model-io <model-io>`

这些类的使用方法可以在对应module的文档下查看.
"""
from .embed_loader import EmbedLoader
from .dataset_loader import *
from .config_io import *
from .model_io import *

__all__ = [
'EmbedLoader',

'DataSetLoader',
'CSVLoader',
'JsonLoader',
'ConllLoader',
'SNLILoader',
'SSTLoader',
'PeopleDailyCorpusLoader',
'Conll2003Loader',

'ConfigLoader',
'ConfigSection',
'ConfigSaver',

'ModelLoader',
'ModelSaver',
]

+ 5
- 0
fastNLP/io/config_io.py View File

@@ -1,3 +1,8 @@
"""
.. _config-io:

用于读入和处理和保存 config 文件
"""
import configparser
import json
import os


+ 17
- 1
fastNLP/io/dataset_loader.py View File

@@ -1,3 +1,18 @@
"""
.. _dataset-loader:

DataSetLoader 的 API, 用于读取不同格式的数据, 并返回 `DataSet` ,
得到的 `DataSet` 对象可以直接传入 `Trainer`, `Tester`, 用于模型的训练和测试

Example::

loader = SNLILoader()
train_ds = loader.load('path/to/train')
dev_ds = loader.load('path/to/dev')
test_ds = loader.load('path/to/test')

# ... do stuff
"""
import os
import json
from nltk.tree import Tree
@@ -55,8 +70,9 @@ def _uncompress(src, dst):


class DataSetLoader:
"""所有`DataSetLoader`的接口
"""

所有`DataSetLoader`的接口
"""

def load(self, path):


+ 5
- 0
fastNLP/io/embed_loader.py View File

@@ -1,3 +1,8 @@
"""
.. _embed-loader:

用于读取预训练的embedding, 读取结果可直接载入为模型参数
"""
import os

import numpy as np


+ 5
- 0
fastNLP/io/model_io.py View File

@@ -1,3 +1,8 @@
"""
.. _model-io:

用于载入和保存模型
"""
import torch

from fastNLP.io.base_loader import BaseLoader


+ 2
- 0
fastNLP/models/biaffine_parser.py View File

@@ -1,3 +1,5 @@
"""Biaffine Dependency Parser 的 Pytorch 实现.
"""
from collections import defaultdict

import numpy as np


+ 2
- 0
fastNLP/models/star_transformer.py View File

@@ -1,3 +1,5 @@
"""Star-Transformer 的 一个 Pytorch 实现.
"""
from fastNLP.modules.encoder.star_transformer import StarTransformer
from fastNLP.core.utils import seq_lens_to_masks



+ 5
- 2
fastNLP/modules/encoder/lstm.py View File

@@ -1,3 +1,6 @@
"""轻量封装的 Pytorch LSTM 模块.
可在 forward 时传入序列的长度, 自动对padding做合适的处理.
"""
import torch
import torch.nn as nn
import torch.nn.utils.rnn as rnn
@@ -35,8 +38,8 @@ class LSTM(nn.Module):
:param h0: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None``
:param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None``
:return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列
:和 [batch, hidden_size*num_direction] 最后时刻隐状态.
:若 ``get_hidden=False`` 仅返回输出序列.
和 [batch, hidden_size*num_direction] 最后时刻隐状态.
若 ``get_hidden=False`` 仅返回输出序列.
"""
if h0 is not None and c0 is not None:
hx = (h0, c0)


+ 2
- 0
fastNLP/modules/encoder/star_transformer.py View File

@@ -1,3 +1,5 @@
"""Star-Transformer 的encoder部分的 Pytorch 实现
"""
import torch
from torch import nn
from torch.nn import functional as F


+ 17
- 8
fastNLP/modules/encoder/variational_rnn.py View File

@@ -1,3 +1,5 @@
"""Variational RNN 的 Pytorch 实现
"""
import torch
import torch.nn as nn
from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
@@ -28,11 +30,11 @@ class VarRnnCellWrapper(nn.Module):
"""
:param PackedSequence input_x: [seq_len, batch_size, input_size]
:param hidden: for LSTM, tuple of (h_0, c_0), [batch_size, hidden_size]
:for other RNN, h_0, [batch_size, hidden_size]
for other RNN, h_0, [batch_size, hidden_size]
:param mask_x: [batch_size, input_size] dropout mask for input
:param mask_h: [batch_size, hidden_size] dropout mask for hidden
:return PackedSequence output: [seq_len, bacth_size, hidden_size]
:hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size]
hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size]
for other RNN, h_n, [batch_size, hidden_size]
"""
def get_hi(hi, h0, size):
@@ -95,7 +97,7 @@ class VarRNNBase(nn.Module):
:param num_layers: rnn的层数. Default: 1
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True``
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为
:(batch, seq, feature). Default: ``False``
(batch, seq, feature). Default: ``False``
:param input_dropout: 对输入的dropout概率. Default: 0
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False``
@@ -138,7 +140,7 @@ class VarRNNBase(nn.Module):
:param x: [batch, seq_len, input_size] 输入序列
:param hx: [batch, hidden_size] 初始隐状态, 若为 ``None`` , 设为全1向量. Default: ``None``
:return (output, ht): [batch, seq_len, hidden_size*num_direction] 输出序列
:和 [batch, hidden_size*num_direction] 最后时刻隐状态
和 [batch, hidden_size*num_direction] 最后时刻隐状态
"""
is_lstm = self.is_lstm
is_packed = isinstance(x, PackedSequence)
@@ -193,7 +195,6 @@ class VarRNNBase(nn.Module):

return output, hidden


class VarLSTM(VarRNNBase):
"""Variational Dropout LSTM.

@@ -202,7 +203,7 @@ class VarLSTM(VarRNNBase):
:param num_layers: rnn的层数. Default: 1
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True``
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为
:(batch, seq, feature). Default: ``False``
(batch, seq, feature). Default: ``False``
:param input_dropout: 对输入的dropout概率. Default: 0
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0
:param bidirectional: 若为 ``True``, 使用双向的LSTM. Default: ``False``
@@ -211,6 +212,9 @@ class VarLSTM(VarRNNBase):
def __init__(self, *args, **kwargs):
super(VarLSTM, self).__init__(mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs)

def forward(self, x, hx=None):
return super(VarLSTM, self).forward(x, hx)


class VarRNN(VarRNNBase):
"""Variational Dropout RNN.
@@ -220,7 +224,7 @@ class VarRNN(VarRNNBase):
:param num_layers: rnn的层数. Default: 1
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True``
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为
:(batch, seq, feature). Default: ``False``
(batch, seq, feature). Default: ``False``
:param input_dropout: 对输入的dropout概率. Default: 0
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False``
@@ -229,6 +233,8 @@ class VarRNN(VarRNNBase):
def __init__(self, *args, **kwargs):
super(VarRNN, self).__init__(mode="RNN", Cell=nn.RNNCell, *args, **kwargs)

def forward(self, x, hx=None):
return super(VarRNN, self).forward(x, hx)

class VarGRU(VarRNNBase):
"""Variational Dropout GRU.
@@ -238,7 +244,7 @@ class VarGRU(VarRNNBase):
:param num_layers: rnn的层数. Default: 1
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True``
:param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为
:(batch, seq, feature). Default: ``False``
(batch, seq, feature). Default: ``False``
:param input_dropout: 对输入的dropout概率. Default: 0
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0
:param bidirectional: 若为 ``True``, 使用双向的GRU. Default: ``False``
@@ -247,6 +253,9 @@ class VarGRU(VarRNNBase):
def __init__(self, *args, **kwargs):
super(VarGRU, self).__init__(mode="GRU", Cell=nn.GRUCell, *args, **kwargs)

def forward(self, x, hx=None):
return super(VarGRU, self).forward(x, hx)

# if __name__ == '__main__':
# x = torch.Tensor([[1,2,3], [4,5,0], [6,0,0]])[:,:,None] * 0.1
# mask = (x != 0).float().view(3, -1)


+ 3
- 3
requirements.txt View File

@@ -1,5 +1,5 @@
numpy>=1.14.2
numpy
torch>=0.4.0
tensorboardX
tqdm>=4.28.1
nltk>=3.4.1
tqdm
nltk

Loading…
Cancel
Save