Browse Source

Merge remote-tracking branch 'private/dev' into dev

# Conflicts:
#	fastNLP/api/api.py
#	fastNLP/modules/encoder/variational_rnn.py
tags/v0.3.0^2
yunfan 5 years ago
parent
commit
62a7556a04
37 changed files with 32609 additions and 359 deletions
  1. +2
    -1
      .travis.yml
  2. +3
    -1
      README.md
  3. +60
    -24
      fastNLP/api/api.py
  4. +2
    -2
      fastNLP/api/processor.py
  5. +6
    -1
      fastNLP/core/batch.py
  6. +112
    -31
      fastNLP/core/callback.py
  7. +59
    -81
      fastNLP/core/trainer.py
  8. +27
    -0
      fastNLP/core/utils.py
  9. +2
    -2
      fastNLP/io/dataset_loader.py
  10. +6
    -6
      fastNLP/modules/aggregator/attention.py
  11. +10
    -9
      fastNLP/modules/aggregator/self_attention.py
  12. +16
    -15
      fastNLP/modules/decoder/CRF.py
  13. +11
    -9
      fastNLP/modules/decoder/MLP.py
  14. +2
    -2
      fastNLP/modules/dropout.py
  15. +16
    -18
      fastNLP/modules/encoder/char_embedding.py
  16. +17
    -7
      fastNLP/modules/encoder/conv.py
  17. +15
    -5
      fastNLP/modules/encoder/conv_maxpool.py
  18. +6
    -9
      fastNLP/modules/encoder/embedding.py
  19. +5
    -8
      fastNLP/modules/encoder/linear.py
  20. +9
    -7
      fastNLP/modules/encoder/lstm.py
  21. +59
    -57
      fastNLP/modules/encoder/masked_rnn.py
  22. +3
    -6
      fastNLP/modules/encoder/transformer.py
  23. +9
    -3
      fastNLP/modules/encoder/variational_rnn.py
  24. +23
    -34
      fastNLP/modules/other_modules.py
  25. +2
    -2
      fastNLP/modules/utils.py
  26. +2
    -2
      reproduction/chinese_word_segment/models/cws_model.py
  27. +11
    -2
      reproduction/chinese_word_segment/process/cws_processor.py
  28. +14
    -13
      reproduction/chinese_word_segment/train_context.py
  29. +1
    -1
      test/core/test_dataset.py
  30. +2
    -1
      test/core/test_metrics.py
  31. +1190
    -0
      tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb
  32. +8
    -0
      tutorials/fastnlp_advanced_tutorial/data/config
  33. +100
    -0
      tutorials/fastnlp_advanced_tutorial/hypothesis
  34. +100
    -0
      tutorials/fastnlp_advanced_tutorial/label
  35. +100
    -0
      tutorials/fastnlp_advanced_tutorial/premise
  36. +77
    -0
      tutorials/fastnlp_advanced_tutorial/tutorial_sample_dataset.csv
  37. +30522
    -0
      tutorials/fastnlp_advanced_tutorial/vocab.txt

+ 2
- 1
.travis.yml View File

@@ -4,7 +4,8 @@ python:
# command to install dependencies # command to install dependencies
install: install:
- pip install --quiet -r requirements.txt - pip install --quiet -r requirements.txt
- pip install pytest pytest-cov
- pip install pytest>=3.6
- pip install pytest-cov
# command to run tests # command to run tests
script: script:
- pytest --cov=./ - pytest --cov=./


+ 3
- 1
README.md View File

@@ -48,8 +48,10 @@ For example:
## Resources ## Resources


- [Documentation](https://fastnlp.readthedocs.io/en/latest/) - [Documentation](https://fastnlp.readthedocs.io/en/latest/)
- [Tutorials](https://github.com/fastnlp/fastNLP/tutorials)
- [Source Code](https://github.com/fastnlp/fastNLP) - [Source Code](https://github.com/fastnlp/fastNLP)



## Installation ## Installation
Run the following commands to install fastNLP package. Run the following commands to install fastNLP package.
```shell ```shell
@@ -70,7 +72,7 @@ pip install fastNLP
</tr> </tr>
<tr> <tr>
<td><b> fastNLP.core </b></td> <td><b> fastNLP.core </b></td>
<td> data representation & train/test presedure </td>
<td> data representation & train/test procedure </td>
</tr> </tr>
<tr> <tr>
<td><b> fastNLP.models </b></td> <td><b> fastNLP.models </b></td>


+ 60
- 24
fastNLP/api/api.py View File

@@ -13,9 +13,6 @@ from reproduction.chinese_word_segment.cws_io.cws_reader import ConllCWSReader
from reproduction.pos_tag_model.pos_reader import ZhConllPOSReader from reproduction.pos_tag_model.pos_reader import ZhConllPOSReader
from reproduction.Biaffine_parser.util import ConllxDataLoader, add_seg_tag from reproduction.Biaffine_parser.util import ConllxDataLoader, add_seg_tag
from fastNLP.core.instance import Instance from fastNLP.core.instance import Instance
from fastNLP.core.sampler import SequentialSampler
from fastNLP.core.batch import Batch
from reproduction.chinese_word_segment.utils import calculate_pre_rec_f1
from fastNLP.api.pipeline import Pipeline from fastNLP.api.pipeline import Pipeline
from fastNLP.core.metrics import SpanFPreRecMetric from fastNLP.core.metrics import SpanFPreRecMetric
from fastNLP.api.processor import IndexerProcessor from fastNLP.api.processor import IndexerProcessor
@@ -23,10 +20,9 @@ from fastNLP.api.processor import IndexerProcessor


# TODO add pretrain urls # TODO add pretrain urls
model_urls = { model_urls = {
'cws': "http://123.206.98.91:8888/download/cws_crf_1_11-457fc899.pkl"
} }



class API: class API:
def __init__(self): def __init__(self):
self.pipeline = None self.pipeline = None
@@ -139,6 +135,12 @@ class POS(API):


class CWS(API): class CWS(API):
def __init__(self, model_path=None, device='cpu'): def __init__(self, model_path=None, device='cpu'):
"""
中文分词高级接口。

:param model_path: 当model_path为None,使用默认位置的model。如果默认位置不存在,则自动下载模型
:param device: str,可以为'cpu', 'cuda'或'cuda:0'等。会将模型load到相应device进行推断。
"""
super(CWS, self).__init__() super(CWS, self).__init__()
if model_path is None: if model_path is None:
model_path = model_urls['cws'] model_path = model_urls['cws']
@@ -146,7 +148,13 @@ class CWS(API):
self.load(model_path, device) self.load(model_path, device)


def predict(self, content): def predict(self, content):
"""
分词接口。


:param content: str或List[str], 例如: "中文分词很重要!", 返回的结果是"中文 分词 很 重要 !"。 如果传入的为List[str],比如
[ "中文分词很重要!", ...], 返回的结果["中文 分词 很 重要 !", ...]。
:return: str或List[str], 根据输入的的类型决定。
"""
if not hasattr(self, 'pipeline'): if not hasattr(self, 'pipeline'):
raise ValueError("You have to load model first.") raise ValueError("You have to load model first.")


@@ -164,17 +172,35 @@ class CWS(API):
# 3. 使用pipeline # 3. 使用pipeline
self.pipeline(dataset) self.pipeline(dataset)


output = dataset['output'].content
output = dataset.get_field('output').content
if isinstance(content, str): if isinstance(content, str):
return output[0] return output[0]
elif isinstance(content, list): elif isinstance(content, list):
return output return output


def test(self, filepath): def test(self, filepath):

tag_proc = self._dict['tag_indexer']
"""
传入一个分词文件路径,返回该数据集上分词f1, precision, recall。
分词文件应该为:
1 编者按 编者按 NN O 11 nmod:topic
2 : : PU O 11 punct
3 7月 7月 NT DATE 4 compound:nn
4 12日 12日 NT DATE 11 nmod:tmod
5 , , PU O 11 punct

1 这 这 DT O 3 det
2 款 款 M O 1 mark:clf
3 飞行 飞行 NN O 8 nsubj
4 从 从 P O 5 case
5 外型 外型 NN O 8 nmod:prep
以空行分割两个句子,有内容的每行有7列。

:param filepath: str, 文件路径路径。
:return: float, float, float. 分别f1, precision, recall.
"""
tag_proc = self._dict['tag_proc']
cws_model = self.pipeline.pipeline[-2].model cws_model = self.pipeline.pipeline[-2].model
pipeline = self.pipeline.pipeline[:5]
pipeline = self.pipeline.pipeline[:-2]


pipeline.insert(1, tag_proc) pipeline.insert(1, tag_proc)
pp = Pipeline(pipeline) pp = Pipeline(pipeline)
@@ -185,12 +211,16 @@ class CWS(API):
te_dataset = reader.load(filepath) te_dataset = reader.load(filepath)
pp(te_dataset) pp(te_dataset)


batch_size = 64
te_batcher = Batch(te_dataset, batch_size, SequentialSampler(), use_cuda=False)
pre, rec, f1 = calculate_pre_rec_f1(cws_model, te_batcher, type='bmes')
f1 = round(f1 * 100, 2)
pre = round(pre * 100, 2)
rec = round(rec * 100, 2)
from fastNLP.core.tester import Tester
from fastNLP.core.metrics import BMESF1PreRecMetric

tester = Tester(data=te_dataset, model=cws_model, metrics=BMESF1PreRecMetric(target='target'), batch_size=64,
verbose=0)
eval_res = tester.test()

f1 = eval_res['BMESF1PreRecMetric']['f']
pre = eval_res['BMESF1PreRecMetric']['pre']
rec = eval_res['BMESF1PreRecMetric']['rec']
# print("f1:{:.2f}, pre:{:.2f}, rec:{:.2f}".format(f1, pre, rec)) # print("f1:{:.2f}, pre:{:.2f}, rec:{:.2f}".format(f1, pre, rec))


return f1, pre, rec return f1, pre, rec
@@ -287,7 +317,7 @@ class Analyzer:


def test(self, filepath): def test(self, filepath):
output_dict = {} output_dict = {}
if self.seg:
if self.cws:
seg_output = self.cws.test(filepath) seg_output = self.cws.test(filepath)
output_dict['seg'] = seg_output output_dict['seg'] = seg_output
if self.pos: if self.pos:
@@ -309,18 +339,24 @@ if __name__ == "__main__":
# print(pos.test("/home/zyfeng/data/sample.conllx")) # print(pos.test("/home/zyfeng/data/sample.conllx"))
# print(pos.predict(s)) # print(pos.predict(s))


# cws_model_path = '../../reproduction/chinese_word_segment/models/cws_crf.pkl'
# cws = CWS(device='cpu')
# s = ['本品是一个抗酸抗胆汁的胃黏膜保护剂' ,
# '这款飞行从外型上来看酷似电影中的太空飞行器,据英国方面介绍,可以实现洲际远程打击。',
# '那么这款无人机到底有多厉害?']
# print(cws.test('/Users/yh/Desktop/test_data/cws_test.conll'))
# print(cws.predict(s))

# cws_model_path = '../../reproduction/chinese_word_segment/models/cws_crf_1_11.pkl'
cws = CWS(device='cpu')
s = ['本品是一个抗酸抗胆汁的胃黏膜保护剂' ,
'这款飞行从外型上来看酷似电影中的太空飞行器,据英国方面介绍,可以实现洲际远程打击。',
parser_path = '/home/yfshao/workdir/fastnlp/reproduction/Biaffine_parser/pipe.pkl' parser_path = '/home/yfshao/workdir/fastnlp/reproduction/Biaffine_parser/pipe.pkl'
parser = Parser(parser_path, device='cpu') parser = Parser(parser_path, device='cpu')
# print(parser.test('/Users/yh/Desktop/test_data/parser_test2.conll')) # print(parser.test('/Users/yh/Desktop/test_data/parser_test2.conll'))
s = ['编者按:7月12日,英国航空航天系统公司公布了该公司研制的第一款高科技隐形无人机雷电之神。', s = ['编者按:7月12日,英国航空航天系统公司公布了该公司研制的第一款高科技隐形无人机雷电之神。',
'这款飞行从外型上来看酷似电影中的太空飞行器,据英国方面介绍,可以实现洲际远程打击。', '这款飞行从外型上来看酷似电影中的太空飞行器,据英国方面介绍,可以实现洲际远程打击。',
'那么这款无人机到底有多厉害?'] '那么这款无人机到底有多厉害?']
print(cws.test('/home/hyan/ctb3/test.conllx'))
print(cws.predict(s))
print(cws.predict('本品是一个抗酸抗胆汁的胃黏膜保护剂'))

# parser = Parser(device='cpu')
# print(parser.test('/Users/yh/Desktop/test_data/parser_test2.conll'))
# s = ['编者按:7月12日,英国航空航天系统公司公布了该公司研制的第一款高科技隐形无人机雷电之神。',
# '这款飞行从外型上来看酷似电影中的太空飞行器,据英国方面介绍,可以实现洲际远程打击。',
# '那么这款无人机到底有多厉害?']
# print(parser.predict(s))
print(parser.predict(s)) print(parser.predict(s))

+ 2
- 2
fastNLP/api/processor.py View File

@@ -270,8 +270,8 @@ class ModelProcessor(Processor):
for idx, seq_len in enumerate(seq_lens): for idx, seq_len in enumerate(seq_lens):
tmp_batch.append(value[idx, :seq_len]) tmp_batch.append(value[idx, :seq_len])
batch_output[key].extend(tmp_batch) batch_output[key].extend(tmp_batch)
batch_output[self.seq_len_field_name].extend(seq_lens)
if not self.seq_len_field_name in prediction:
batch_output[self.seq_len_field_name].extend(seq_lens)


# TODO 当前的实现会导致之后的processor需要知道model输出的output的key是什么 # TODO 当前的实现会导致之后的processor需要知道model输出的output的key是什么
for field_name, fields in batch_output.items(): for field_name, fields in batch_output.items():


+ 6
- 1
fastNLP/core/batch.py View File

@@ -26,7 +26,8 @@ class Batch(object):
self.as_numpy = as_numpy self.as_numpy = as_numpy
self.idx_list = None self.idx_list = None
self.curidx = 0 self.curidx = 0
self.num_batches = len(dataset)//batch_size + int(len(dataset)%batch_size!=0)
self.num_batches = len(dataset) // batch_size + int(len(dataset) % batch_size != 0)
self.cur_batch_indices = None


def __iter__(self): def __iter__(self):
self.idx_list = self.sampler(self.dataset) self.idx_list = self.sampler(self.dataset)
@@ -42,6 +43,7 @@ class Batch(object):
batch_x, batch_y = {}, {} batch_x, batch_y = {}, {}


indices = self.idx_list[self.curidx:endidx] indices = self.idx_list[self.curidx:endidx]
self.cur_batch_indices = indices


for field_name, field in self.dataset.get_all_fields().items(): for field_name, field in self.dataset.get_all_fields().items():
if field.is_target or field.is_input: if field.is_target or field.is_input:
@@ -60,6 +62,9 @@ class Batch(object):
def __len__(self): def __len__(self):
return self.num_batches return self.num_batches


def get_batch_indices(self):
return self.cur_batch_indices



def to_tensor(batch, dtype): def to_tensor(batch, dtype):
if dtype in (int, np.int8, np.int16, np.int32, np.int64): if dtype in (int, np.int8, np.int16, np.int32, np.int64):


+ 112
- 31
fastNLP/core/callback.py View File

@@ -12,34 +12,72 @@ class Callback(object):
# before the main training loop # before the main training loop
pass pass


def before_epoch(self):
def before_epoch(self, cur_epoch, total_epoch):
# at the beginning of each epoch # at the beginning of each epoch
pass pass


def before_batch(self):
def before_batch(self, batch_x, batch_y, indices):
# at the beginning of each step/mini-batch # at the beginning of each step/mini-batch
pass pass


def before_loss(self):
def before_loss(self, batch_y, predict_y):
# after data_forward, and before loss computation # after data_forward, and before loss computation
pass pass


def before_backward(self):
def before_backward(self, loss, model):
# after loss computation, and before gradient backward # after loss computation, and before gradient backward
pass pass


def after_batch(self):
def after_backward(self, model):
pass

def after_step(self, optimizer):
pass

def after_batch(self, *args):
# at the end of each step/mini-batch # at the end of each step/mini-batch
pass pass


def after_epoch(self):
# at the end of each epoch
def after_valid(self, eval_result, metric_key, optimizer):
"""
每次执行验证机的evaluation后会调用。传入eval_result

:param eval_result: Dict[str: Dict[str: float]], evaluation的结果
:param metric_key: str
:param optimizer:
:return:
"""
pass pass


def after_train(self):
# after training loop
def after_epoch(self, cur_epoch, n_epoch, optimizer):
"""
每个epoch结束将会调用该方法

:param cur_epoch: int, 当前的batch。从1开始。
:param n_epoch: int, 总的batch数
:param optimizer: 传入Trainer的optimizer。
:return:
"""
pass pass


def after_train(self, model):
"""
训练结束,调用该方法

:param model: nn.Module, 传入Trainer的模型
:return:
"""
pass

def on_exception(self, exception, model, indices):
"""
当训练过程出现异常,会触发该方法
:param exception: 某种类型的Exception,比如KeyboardInterrupt等
:param model: 传入Trainer的模型
:param indices: 当前batch的index
:return:
"""
pass


def transfer(func): def transfer(func):
"""装饰器,将对CallbackManager的调用转发到各个Callback子类. """装饰器,将对CallbackManager的调用转发到各个Callback子类.
@@ -48,12 +86,12 @@ def transfer(func):
:return: :return:
""" """


def wrapper(manager):
def wrapper(manager, *arg):
returns = [] returns = []
for callback in manager.callbacks: for callback in manager.callbacks:
for env_name, env_value in manager.env.items(): for env_name, env_value in manager.env.items():
setattr(callback, env_name, env_value) setattr(callback, env_name, env_value)
returns.append(getattr(callback, func.__name__)())
returns.append(getattr(callback, func.__name__)(*arg))
return returns return returns


return wrapper return wrapper
@@ -91,19 +129,27 @@ class CallbackManager(Callback):
pass pass


@transfer @transfer
def before_epoch(self):
def before_epoch(self, cur_epoch, total_epoch):
pass pass


@transfer @transfer
def before_batch(self):
def before_batch(self, batch_x, batch_y, indices):
pass pass


@transfer @transfer
def before_loss(self):
def before_loss(self, batch_y, predict_y):
pass pass


@transfer @transfer
def before_backward(self):
def before_backward(self, loss, model):
pass

@transfer
def after_backward(self, model):
pass

@transfer
def after_step(self, optimizer):
pass pass


@transfer @transfer
@@ -111,51 +157,86 @@ class CallbackManager(Callback):
pass pass


@transfer @transfer
def after_epoch(self):
def after_valid(self, eval_result, metric_key, optimizer):
pass pass


@transfer @transfer
def after_train(self):
def after_epoch(self, cur_epoch, n_epoch, optimizer):
pass

@transfer
def after_train(self, model):
pass

@transfer
def on_exception(self, exception, model, indices):
pass pass




class DummyCallback(Callback): class DummyCallback(Callback):
def before_train(self):
print("before train!!!")
print(self.n_epoch)
def before_train(self, *arg):
print(arg)


def after_epoch(self):
print("after epoch!!!")
return 12
def after_epoch(self, cur_epoch, n_epoch, optimizer):
print(cur_epoch, n_epoch, optimizer)




class EchoCallback(Callback): class EchoCallback(Callback):
def before_train(self): def before_train(self):
print("before_train") print("before_train")


def before_epoch(self):
def before_epoch(self, cur_epoch, total_epoch):
print("before_epoch") print("before_epoch")


def before_batch(self):
def before_batch(self, batch_x, batch_y, indices):
print("before_batch") print("before_batch")


def before_loss(self):
def before_loss(self, batch_y, predict_y):
print("before_loss") print("before_loss")


def before_backward(self):
def before_backward(self, loss, model):
print("before_backward") print("before_backward")


def after_batch(self): def after_batch(self):
print("after_batch") print("after_batch")


def after_epoch(self):
def after_epoch(self, cur_epoch, n_epoch, optimizer):
print("after_epoch") print("after_epoch")


def after_train(self):
def after_train(self, model):
print("after_train") print("after_train")


class GradientClipCallback(Callback):
def __init__(self, parameters=None, clip_value=1, clip_type='norm'):
"""
每次backward前,将parameter的gradient clip到某个范围。

:param parameters: None, torch.Tensor或List[torch.Tensor], 一般通过model.parameters()获得。如果为None则默认对Trainer
的model中所有参数进行clip
:param clip_value: float, 将gradient 限制到[-clip_value, clip_value]。clip_value应该为正数
:param clip_type: str, 支持'norm', 'value'两种。
(1) 'norm', 将gradient的norm rescale到[-clip_value, clip_value]
(2) 'value', 将gradient限制在[-clip_value, clip_value], 小于-clip_value的gradient被赋值为-clip_value; 大于
clip_value的gradient被赋值为clip_value.
"""
super().__init__()

from torch import nn
if clip_type == 'norm':
self.clip_fun = nn.utils.clip_grad_norm_
elif clip_type == 'value':
self.clip_fun = nn.utils.clip_grad_value_
else:
raise ValueError("Only supports `norm` or `value` right now.")
self.parameters = parameters
self.clip_value = clip_value

def after_backward(self, model):
self.clip_fun(model.parameters(), self.clip_value)




if __name__ == "__main__": if __name__ == "__main__":
manager = CallbackManager(env={"n_epoch": 3}, callbacks=[DummyCallback(), DummyCallback()]) manager = CallbackManager(env={"n_epoch": 3}, callbacks=[DummyCallback(), DummyCallback()])
manager.before_train()
print(manager.after_epoch())
manager.before_train(10, 11, 12)
# print(manager.after_epoch())

+ 59
- 81
fastNLP/core/trainer.py View File

@@ -7,7 +7,11 @@ import numpy as np
import torch import torch
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from torch import nn from torch import nn
from tqdm.autonotebook import tqdm

try:
from tqdm.autonotebook import tqdm
except:
from fastNLP.core.utils import pseudo_tqdm as tqdm


from fastNLP.core.batch import Batch from fastNLP.core.batch import Batch
from fastNLP.core.callback import CallbackManager from fastNLP.core.callback import CallbackManager
@@ -108,7 +112,7 @@ class Trainer(object):
self.use_cuda = bool(use_cuda) self.use_cuda = bool(use_cuda)
self.save_path = save_path self.save_path = save_path
self.print_every = int(print_every) self.print_every = int(print_every)
self.validate_every = int(validate_every)
self.validate_every = int(validate_every) if validate_every!=0 else -1
self.best_metric_indicator = None self.best_metric_indicator = None
self.sampler = sampler self.sampler = sampler
self.callback_manager = CallbackManager(env={"trainer": self}, callbacks=callbacks) self.callback_manager = CallbackManager(env={"trainer": self}, callbacks=callbacks)
@@ -119,11 +123,7 @@ class Trainer(object):
self.optimizer = optimizer.construct_from_pytorch(self.model.parameters()) self.optimizer = optimizer.construct_from_pytorch(self.model.parameters())


self.use_tqdm = use_tqdm self.use_tqdm = use_tqdm
if self.use_tqdm:
tester_verbose = 0
self.print_every = abs(self.print_every)
else:
tester_verbose = 1
self.print_every = abs(self.print_every)


if self.dev_data is not None: if self.dev_data is not None:
self.tester = Tester(model=self.model, self.tester = Tester(model=self.model,
@@ -131,7 +131,7 @@ class Trainer(object):
metrics=self.metrics, metrics=self.metrics,
batch_size=self.batch_size, batch_size=self.batch_size,
use_cuda=self.use_cuda, use_cuda=self.use_cuda,
verbose=tester_verbose)
verbose=0)


self.step = 0 self.step = 0
self.start_time = None # start timestamp self.start_time = None # start timestamp
@@ -199,11 +199,8 @@ class Trainer(object):
self._summary_writer = SummaryWriter(path) self._summary_writer = SummaryWriter(path)


self.callback_manager.before_train() self.callback_manager.before_train()
if self.use_tqdm:
self._tqdm_train()
else:
self._print_train()
self.callback_manager.after_train()
self._train()
self.callback_manager.after_train(self.model)


if self.dev_data is not None: if self.dev_data is not None:
print("\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) + print("\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) +
@@ -225,28 +222,43 @@ class Trainer(object):


return results return results


def _tqdm_train(self):
def _train(self):
if not self.use_tqdm:
from fastNLP.core.utils import pseudo_tqdm as inner_tqdm
else:
inner_tqdm = tqdm
self.step = 0 self.step = 0
data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler,
as_numpy=False)
total_steps = data_iterator.num_batches*self.n_epochs
with tqdm(total=total_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar:
start = time.time()
data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False)
total_steps = data_iterator.num_batches * self.n_epochs
with inner_tqdm(total=total_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar:
avg_loss = 0 avg_loss = 0
for epoch in range(1, self.n_epochs+1): for epoch in range(1, self.n_epochs+1):
pbar.set_description_str(desc="Epoch {}/{}".format(epoch, self.n_epochs)) pbar.set_description_str(desc="Epoch {}/{}".format(epoch, self.n_epochs))
self.callback_manager.before_epoch()
# early stopping
self.callback_manager.before_epoch(epoch, self.n_epochs)
for batch_x, batch_y in data_iterator: for batch_x, batch_y in data_iterator:
self.callback_manager.before_batch()
indices = data_iterator.get_batch_indices()
# negative sampling; replace unknown; re-weight batch_y
self.callback_manager.before_batch(batch_x, batch_y, indices)
_move_dict_value_to_device(batch_x, batch_y, device=self._model_device) _move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
prediction = self._data_forward(self.model, batch_x) prediction = self._data_forward(self.model, batch_x)


self.callback_manager.before_loss()
# edit prediction
self.callback_manager.before_loss(batch_y, prediction)
loss = self._compute_loss(prediction, batch_y) loss = self._compute_loss(prediction, batch_y)
avg_loss += loss.item() avg_loss += loss.item()


self.callback_manager.before_backward()
# Is loss NaN or inf? requires_grad = False
self.callback_manager.before_backward(loss, self.model)
self._grad_backward(loss) self._grad_backward(loss)
# gradient clipping
self.callback_manager.after_backward(self.model)

self._update() self._update()
# lr scheduler; lr_finder; one_cycle
self.callback_manager.after_step(self.optimizer)

self._summary_writer.add_scalar("loss", loss.item(), global_step=self.step) self._summary_writer.add_scalar("loss", loss.item(), global_step=self.step)
for name, param in self.model.named_parameters(): for name, param in self.model.named_parameters():
if param.requires_grad: if param.requires_grad:
@@ -254,77 +266,41 @@ class Trainer(object):
# self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step) # self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step)
# self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step) # self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step)
if (self.step+1) % self.print_every == 0: if (self.step+1) % self.print_every == 0:
pbar.set_postfix_str("loss:{0:<6.5f}".format(avg_loss / self.print_every))
if self.use_tqdm:
print_output = "loss:{0:<6.5f}".format(avg_loss / self.print_every)
pbar.update(self.print_every)
else:
end = time.time()
diff = timedelta(seconds=round(end - start))
print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
epoch, self.step, avg_loss, diff)
pbar.set_postfix_str(print_output)
avg_loss = 0 avg_loss = 0
pbar.update(self.print_every)
self.step += 1 self.step += 1
# do nothing
self.callback_manager.after_batch() self.callback_manager.after_batch()


if self.validate_every > 0 and self.step % self.validate_every == 0 \
if ((self.validate_every > 0 and self.step % self.validate_every == 0) or
(self.validate_every < 0 and self.step % len(data_iterator)) == 0) \
and self.dev_data is not None: and self.dev_data is not None:
eval_res = self._do_validation(epoch=epoch, step=self.step) eval_res = self._do_validation(epoch=epoch, step=self.step)
eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
eval_str = "Evaluation at Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step,
total_steps) + \
self.tester._format_eval_results(eval_res) self.tester._format_eval_results(eval_res)
pbar.write(eval_str) pbar.write(eval_str)
if self.validate_every < 0 and self.dev_data:
eval_res = self._do_validation(epoch=epoch, step=self.step)
eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
self.tester._format_eval_results(eval_res)
pbar.write(eval_str)
if epoch!=self.n_epochs:

# if self.validate_every < 0 and self.dev_data:
# eval_res = self._do_validation(epoch=epoch, step=self.step)
# eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
# self.tester._format_eval_results(eval_res)
# pbar.write(eval_str)
if epoch != self.n_epochs:
data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler,
as_numpy=False) as_numpy=False)
self.callback_manager.after_epoch()
# lr decay; early stopping
self.callback_manager.after_epoch(epoch, self.n_epochs, self.optimizer)
pbar.close() pbar.close()


def _print_train(self):
epoch = 1
start = time.time()
while epoch <= self.n_epochs:
self.callback_manager.before_epoch()

data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler,
as_numpy=False)

for batch_x, batch_y in data_iterator:
self.callback_manager.before_batch()
# TODO 这里可能会遇到问题,万一用户在model内部修改了prediction的device就会有问题
_move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
prediction = self._data_forward(self.model, batch_x)

self.callback_manager.before_loss()
loss = self._compute_loss(prediction, batch_y)

self.callback_manager.before_backward()
self._grad_backward(loss)
self._update()

self._summary_writer.add_scalar("loss", loss.item(), global_step=self.step)
for name, param in self.model.named_parameters():
if param.requires_grad:
self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=self.step)
# self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step)
# self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step)
if self.print_every > 0 and self.step % self.print_every == 0:
end = time.time()
diff = timedelta(seconds=round(end - start))
print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
epoch, self.step, loss.data, diff)
print(print_output)

if (self.validate_every > 0 and self.step % self.validate_every == 0 and
self.dev_data is not None):
self._do_validation(epoch=epoch, step=self.step)

self.step += 1
self.callback_manager.after_batch()

# validate_every override validation at end of epochs
if self.dev_data and self.validate_every <= 0:
self._do_validation(epoch=epoch, step=self.step)
epoch += 1
self.callback_manager.after_epoch()

def _do_validation(self, epoch, step): def _do_validation(self, epoch, step):
res = self.tester.test() res = self.tester.test()
for name, metric in res.items(): for name, metric in res.items():
@@ -340,6 +316,8 @@ class Trainer(object):
self.best_dev_perf = res self.best_dev_perf = res
self.best_dev_epoch = epoch self.best_dev_epoch = epoch
self.best_dev_step = step self.best_dev_step = step
# get validation results; adjust optimizer
self.callback_manager.after_valid(res, self.metric_key, self.optimizer)
return res return res


def _mode(self, model, is_test=False): def _mode(self, model, is_test=False):


+ 27
- 0
fastNLP/core/utils.py View File

@@ -430,3 +430,30 @@ def seq_mask(seq_len, max_len):
seq_len = seq_len.view(-1, 1).long() # [batch_size, 1] seq_len = seq_len.view(-1, 1).long() # [batch_size, 1]
seq_range = torch.arange(start=0, end=max_len, dtype=torch.long, device=seq_len.device).view(1, -1) # [1, max_len] seq_range = torch.arange(start=0, end=max_len, dtype=torch.long, device=seq_len.device).view(1, -1) # [1, max_len]
return torch.gt(seq_len, seq_range) # [batch_size, max_len] return torch.gt(seq_len, seq_range) # [batch_size, max_len]


class pseudo_tqdm:
"""
当无法引入tqdm,或者Trainer中设置use_tqdm为false的时候,用该方法打印数据
"""

def __init__(self, **kwargs):
pass

def write(self, info):
print(info)

def set_postfix_str(self, info):
print(info)

def __getattr__(self, item):
def pass_func(*args, **kwargs):
pass

return pass_func

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
del self

+ 2
- 2
fastNLP/io/dataset_loader.py View File

@@ -254,7 +254,7 @@ class TokenizeDataSetLoader(DataSetLoader):




class ClassDataSetLoader(DataSetLoader): class ClassDataSetLoader(DataSetLoader):
"""Loader for classification data sets"""
"""Loader for a dummy classification data set"""


def __init__(self): def __init__(self):
super(ClassDataSetLoader, self).__init__() super(ClassDataSetLoader, self).__init__()
@@ -304,7 +304,7 @@ class ConllLoader(DataSetLoader):
@staticmethod @staticmethod
def parse(lines): def parse(lines):
""" """
:param list lines:a list containing all lines in a conll file.
:param list lines: a list containing all lines in a conll file.
:return: a 3D list :return: a 3D list
""" """
sentences = list() sentences = list()


+ 6
- 6
fastNLP/modules/aggregator/attention.py View File

@@ -1,12 +1,13 @@
import math

import torch import torch
from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
import math
from torch import nn

from fastNLP.modules.utils import mask_softmax from fastNLP.modules.utils import mask_softmax




class Attention(torch.nn.Module): class Attention(torch.nn.Module):

def __init__(self, normalize=False): def __init__(self, normalize=False):
super(Attention, self).__init__() super(Attention, self).__init__()
self.normalize = normalize self.normalize = normalize
@@ -20,9 +21,9 @@ class Attention(torch.nn.Module):
def _atten_forward(self, query, memory): def _atten_forward(self, query, memory):
raise NotImplementedError raise NotImplementedError



class DotAtte(nn.Module): class DotAtte(nn.Module):
def __init__(self, key_size, value_size): def __init__(self, key_size, value_size):
# TODO never test
super(DotAtte, self).__init__() super(DotAtte, self).__init__()
self.key_size = key_size self.key_size = key_size
self.value_size = value_size self.value_size = value_size
@@ -42,10 +43,9 @@ class DotAtte(nn.Module):
output = nn.functional.softmax(output, dim=2) output = nn.functional.softmax(output, dim=2)
return torch.matmul(output, V) return torch.matmul(output, V)



class MultiHeadAtte(nn.Module): class MultiHeadAtte(nn.Module):
def __init__(self, input_size, output_size, key_size, value_size, num_atte): def __init__(self, input_size, output_size, key_size, value_size, num_atte):
raise NotImplementedError
# TODO never test
super(MultiHeadAtte, self).__init__() super(MultiHeadAtte, self).__init__()
self.in_linear = nn.ModuleList() self.in_linear = nn.ModuleList()
for i in range(num_atte * 3): for i in range(num_atte * 3):


+ 10
- 9
fastNLP/modules/aggregator/self_attention.py View File

@@ -7,13 +7,14 @@ from fastNLP.modules.utils import initial_parameter




class SelfAttention(nn.Module): class SelfAttention(nn.Module):
"""
Self Attention Module.
"""Self Attention Module.


Args:
input_size: int, the size for the input vector
dim: int, the width of weight matrix.
num_vec: int, the number of encoded vectors
:param int input_size:
:param int attention_unit:
:param int attention_hops:
:param float drop:
:param str initial_method:
:param bool use_cuda:
""" """


def __init__(self, input_size, attention_unit=350, attention_hops=10, drop=0.5, initial_method=None, def __init__(self, input_size, attention_unit=350, attention_hops=10, drop=0.5, initial_method=None,
@@ -48,7 +49,7 @@ class SelfAttention(nn.Module):
def forward(self, input, input_origin): def forward(self, input, input_origin):
""" """
:param input: the matrix to do attention. [baz, senLen, h_dim] :param input: the matrix to do attention. [baz, senLen, h_dim]
:param inp: then token index include pad token( 0 ) [baz , senLen]
:param inp: then token index include pad token( 0 ) [baz , senLen]
:return output1: the input matrix after attention operation [baz, multi-head , h_dim] :return output1: the input matrix after attention operation [baz, multi-head , h_dim]
:return output2: the attention penalty term, a scalar [1] :return output2: the attention penalty term, a scalar [1]
""" """
@@ -59,8 +60,8 @@ class SelfAttention(nn.Module):
input_origin = input_origin.transpose(0, 1).contiguous() # [baz, hops,len] input_origin = input_origin.transpose(0, 1).contiguous() # [baz, hops,len]


y1 = self.tanh(self.ws1(self.drop(input))) # [baz,len,dim] -->[bsz,len, attention-unit] y1 = self.tanh(self.ws1(self.drop(input))) # [baz,len,dim] -->[bsz,len, attention-unit]
attention = self.ws2(y1).transpose(1,
2).contiguous() # [bsz,len, attention-unit]--> [bsz, len, hop]--> [baz,hop,len]
attention = self.ws2(y1).transpose(1, 2).contiguous()
# [bsz,len, attention-unit]--> [bsz, len, hop]--> [baz,hop,len]


attention = attention + (-999999 * (input_origin == 0).float()) # remove the weight on padding token. attention = attention + (-999999 * (input_origin == 0).float()) # remove the weight on padding token.
attention = F.softmax(attention, 2) # [baz ,hop, len] attention = F.softmax(attention, 2) # [baz ,hop, len]


+ 16
- 15
fastNLP/modules/decoder/CRF.py View File

@@ -19,13 +19,14 @@ def seq_len_to_byte_mask(seq_lens):
mask = broadcast_arange.float().lt(seq_lens.float().view(-1, 1)) mask = broadcast_arange.float().lt(seq_lens.float().view(-1, 1))
return mask return mask



def allowed_transitions(id2label, encoding_type='bio'): def allowed_transitions(id2label, encoding_type='bio'):
""" """


:param id2label: dict, key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是
:param dict id2label: key是label的indices,value是str类型的tag或tag-label。value可以是只有tag的, 比如"B", "M"; 也可以是
"B-NN", "M-NN", tag和label之间一定要用"-"隔开。一般可以通过Vocabulary.get_id2word()id2label。 "B-NN", "M-NN", tag和label之间一定要用"-"隔开。一般可以通过Vocabulary.get_id2word()id2label。
:param encoding_type: str, 支持"bio", "bmes"。 :param encoding_type: str, 支持"bio", "bmes"。
:return:List[Tuple(int, int)]], 内部的Tuple是(from_tag_id, to_tag_id)。 返回的结果考虑了start和end,比如"BIO"中,B、O可以
:return: List[Tuple(int, int)]], 内部的Tuple是(from_tag_id, to_tag_id)。 返回的结果考虑了start和end,比如"BIO"中,B、O可以
位于序列的开端,而I不行。所以返回的结果中会包含(start_idx, B_idx), (start_idx, O_idx), 但是不包含(start_idx, I_idx). 位于序列的开端,而I不行。所以返回的结果中会包含(start_idx, B_idx), (start_idx, O_idx), 但是不包含(start_idx, I_idx).
start_idx=len(id2label), end_idx=len(id2label)+1。 start_idx=len(id2label), end_idx=len(id2label)+1。
""" """
@@ -57,6 +58,7 @@ def allowed_transitions(id2label, encoding_type='bio'):
allowed_trans.append((from_id, to_id)) allowed_trans.append((from_id, to_id))
return allowed_trans return allowed_trans



def is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label): def is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label):
""" """


@@ -130,16 +132,16 @@ def is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label)




class ConditionalRandomField(nn.Module): class ConditionalRandomField(nn.Module):
def __init__(self, num_tags, include_start_end_trans=False, allowed_transitions=None, initial_method=None):
"""
"""


:param num_tags: int, 标签的数量。
:param include_start_end_trans: bool, 是否包含起始tag
:param allowed_transitions: List[Tuple[from_tag_id(int), to_tag_id(int)]]. 允许的跃迁,可以通过allowed_transitions()得到。
如果为None,则所有跃迁均为合法
:param initial_method:
"""
:param int num_tags: 标签的数量。
:param bool include_start_end_trans: 是否包含起始tag
:param list allowed_transitions: ``List[Tuple[from_tag_id(int), to_tag_id(int)]]``. 允许的跃迁,可以通过allowed_transitions()得到。
如果为None,则所有跃迁均为合法
:param str initial_method:
"""


def __init__(self, num_tags, include_start_end_trans=False, allowed_transitions=None, initial_method=None):
super(ConditionalRandomField, self).__init__() super(ConditionalRandomField, self).__init__()


self.include_start_end_trans = include_start_end_trans self.include_start_end_trans = include_start_end_trans
@@ -161,7 +163,6 @@ class ConditionalRandomField(nn.Module):


# self.reset_parameter() # self.reset_parameter()
initial_parameter(self, initial_method) initial_parameter(self, initial_method)

def reset_parameter(self): def reset_parameter(self):
nn.init.xavier_normal_(self.trans_m) nn.init.xavier_normal_(self.trans_m)
if self.include_start_end_trans: if self.include_start_end_trans:
@@ -169,9 +170,9 @@ class ConditionalRandomField(nn.Module):
nn.init.normal_(self.end_scores) nn.init.normal_(self.end_scores)


def _normalizer_likelihood(self, logits, mask): def _normalizer_likelihood(self, logits, mask):
"""
Computes the (batch_size,) denominator term for the log-likelihood, which is the
"""Computes the (batch_size,) denominator term for the log-likelihood, which is the
sum of the likelihoods across all possible state sequences. sum of the likelihoods across all possible state sequences.

:param logits:FloatTensor, max_len x batch_size x num_tags :param logits:FloatTensor, max_len x batch_size x num_tags
:param mask:ByteTensor, max_len x batch_size :param mask:ByteTensor, max_len x batch_size
:return:FloatTensor, batch_size :return:FloatTensor, batch_size
@@ -236,8 +237,8 @@ class ConditionalRandomField(nn.Module):
return all_path_score - gold_path_score return all_path_score - gold_path_score


def viterbi_decode(self, data, mask, get_score=False, unpad=False): def viterbi_decode(self, data, mask, get_score=False, unpad=False):
"""
Given a feats matrix, return best decode path and best score.
"""Given a feats matrix, return best decode path and best score.
:param data:FloatTensor, batch_size x max_len x num_tags :param data:FloatTensor, batch_size x max_len x num_tags
:param mask:ByteTensor batch_size x max_len :param mask:ByteTensor batch_size x max_len
:param get_score: bool, whether to output the decode score. :param get_score: bool, whether to output the decode score.


+ 11
- 9
fastNLP/modules/decoder/MLP.py View File

@@ -1,21 +1,23 @@
import torch import torch
import torch.nn as nn import torch.nn as nn

from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter




class MLP(nn.Module): class MLP(nn.Module):
def __init__(self, size_layer, activation='relu', initial_method=None, dropout=0.0):
"""Multilayer Perceptrons as a decoder
"""Multilayer Perceptrons as a decoder


:param size_layer: list of int, define the size of MLP layers.
:param activation: str or function, the activation function for hidden layers.
:param initial_method: str, the name of init method.
:param dropout: float, the probability of dropout.
:param list size_layer: list of int, define the size of MLP layers.
:param str activation: str or function, the activation function for hidden layers.
:param str initial_method: the name of initialization method.
:param float dropout: the probability of dropout.


.. note::
There is no activation function applying on output layer.
.. note::
There is no activation function applying on output layer.


"""
"""

def __init__(self, size_layer, activation='relu', initial_method=None, dropout=0.0):
super(MLP, self).__init__() super(MLP, self).__init__()
self.hiddens = nn.ModuleList() self.hiddens = nn.ModuleList()
self.output = None self.output = None


+ 2
- 2
fastNLP/modules/dropout.py View File

@@ -2,8 +2,8 @@ import torch




class TimestepDropout(torch.nn.Dropout): class TimestepDropout(torch.nn.Dropout):
"""This module accepts a `[batch_size, num_timesteps, embedding_dim)]` and use a single
dropout mask of shape `(batch_size, embedding_dim)` to apply on every time step.
"""This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single
dropout mask of shape ``(batch_size, embedding_dim)`` to apply on every time step.
""" """


def forward(self, x): def forward(self, x):


+ 16
- 18
fastNLP/modules/encoder/char_embedding.py View File

@@ -1,5 +1,4 @@
import torch import torch
import torch.nn.functional as F
from torch import nn from torch import nn


from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter
@@ -7,17 +6,17 @@ from fastNLP.modules.utils import initial_parameter


# from torch.nn.init import xavier_uniform # from torch.nn.init import xavier_uniform
class ConvCharEmbedding(nn.Module): class ConvCharEmbedding(nn.Module):
"""Character-level Embedding with CNN.

:param int char_emb_size: the size of character level embedding. Default: 50
say 26 characters, each embedded to 50 dim vector, then the input_size is 50.
:param tuple feature_maps: tuple of int. The length of the tuple is the number of convolution operations
over characters. The i-th integer is the number of filters (dim of out channels) for the i-th
convolution.
:param tuple kernels: tuple of int. The width of each kernel.
"""


def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None):
"""
Character Level Word Embedding
:param char_emb_size: the size of character level embedding. Default: 50
say 26 characters, each embedded to 50 dim vector, then the input_size is 50.
:param feature_maps: tuple of int. The length of the tuple is the number of convolution operations
over characters. The i-th integer is the number of filters (dim of out channels) for the i-th
convolution.
:param kernels: tuple of int. The width of each kernel.
"""
super(ConvCharEmbedding, self).__init__() super(ConvCharEmbedding, self).__init__()
self.convs = nn.ModuleList([ self.convs = nn.ModuleList([
nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4))
@@ -27,8 +26,8 @@ class ConvCharEmbedding(nn.Module):


def forward(self, x): def forward(self, x):
""" """
:param x: [batch_size * sent_length, word_length, char_emb_size]
:return: [batch_size * sent_length, sum(feature_maps), 1]
:param x: ``[batch_size * sent_length, word_length, char_emb_size]``
:return: feature map of shape [batch_size * sent_length, sum(feature_maps), 1]
""" """
x = x.contiguous().view(x.size(0), 1, x.size(1), x.size(2)) x = x.contiguous().view(x.size(0), 1, x.size(1), x.size(2))
# [batch_size*sent_length, channel, width, height] # [batch_size*sent_length, channel, width, height]
@@ -51,13 +50,12 @@ class ConvCharEmbedding(nn.Module):




class LSTMCharEmbedding(nn.Module): class LSTMCharEmbedding(nn.Module):
"""
Character Level Word Embedding with LSTM with a single layer.
:param char_emb_size: int, the size of character level embedding. Default: 50
"""Character-level Embedding with LSTM.
:param int char_emb_size: the size of character level embedding. Default: 50
say 26 characters, each embedded to 50 dim vector, then the input_size is 50. say 26 characters, each embedded to 50 dim vector, then the input_size is 50.
:param hidden_size: int, the number of hidden units. Default: equal to char_emb_size.
:param int hidden_size: the number of hidden units. Default: equal to char_emb_size.
""" """

def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None): def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None):
super(LSTMCharEmbedding, self).__init__() super(LSTMCharEmbedding, self).__init__()
self.hidden_size = char_emb_size if hidden_size is None else hidden_size self.hidden_size = char_emb_size if hidden_size is None else hidden_size
@@ -71,7 +69,7 @@ class LSTMCharEmbedding(nn.Module):


def forward(self, x): def forward(self, x):
""" """
:param x:[ n_batch*n_word, word_length, char_emb_size]
:param x: ``[ n_batch*n_word, word_length, char_emb_size]``
:return: [ n_batch*n_word, char_emb_size] :return: [ n_batch*n_word, char_emb_size]
""" """
batch_size = x.shape[0] batch_size = x.shape[0]


+ 17
- 7
fastNLP/modules/encoder/conv.py View File

@@ -3,20 +3,30 @@


import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.nn.init import xavier_uniform_
# import torch.nn.functional as F


from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter



# import torch.nn.functional as F


class Conv(nn.Module): class Conv(nn.Module):
"""
Basic 1-d convolution module.
initialize with xavier_uniform
"""
"""Basic 1-d convolution module, initialized with xavier_uniform.


:param int in_channels:
:param int out_channels:
:param tuple kernel_size:
:param int stride:
:param int padding:
:param int dilation:
:param int groups:
:param bool bias:
:param str activation:
:param str initial_method:
"""
def __init__(self, in_channels, out_channels, kernel_size, def __init__(self, in_channels, out_channels, kernel_size,
stride=1, padding=0, dilation=1, stride=1, padding=0, dilation=1,
groups=1, bias=True, activation='relu',initial_method = None ):
groups=1, bias=True, activation='relu', initial_method=None):
super(Conv, self).__init__() super(Conv, self).__init__()
self.conv = nn.Conv1d( self.conv = nn.Conv1d(
in_channels=in_channels, in_channels=in_channels,


+ 15
- 5
fastNLP/modules/encoder/conv_maxpool.py View File

@@ -4,17 +4,27 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn.init import xavier_uniform_
from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter



class ConvMaxpool(nn.Module): class ConvMaxpool(nn.Module):
"""
Convolution and max-pooling module with multiple kernel sizes.
"""
"""Convolution and max-pooling module with multiple kernel sizes.


:param int in_channels:
:param int out_channels:
:param tuple kernel_sizes:
:param int stride:
:param int padding:
:param int dilation:
:param int groups:
:param bool bias:
:param str activation:
:param str initial_method:
"""
def __init__(self, in_channels, out_channels, kernel_sizes, def __init__(self, in_channels, out_channels, kernel_sizes,
stride=1, padding=0, dilation=1, stride=1, padding=0, dilation=1,
groups=1, bias=True, activation='relu',initial_method = None ):
groups=1, bias=True, activation="relu", initial_method=None):
super(ConvMaxpool, self).__init__() super(ConvMaxpool, self).__init__()


# convolution # convolution


+ 6
- 9
fastNLP/modules/encoder/embedding.py View File

@@ -2,16 +2,13 @@ import torch.nn as nn




class Embedding(nn.Module): class Embedding(nn.Module):
"""
A simple lookup table
Args:
nums : the size of the lookup table
dims : the size of each vector
padding_idx : pads the tensor with zeros whenever it encounters this index
sparse : If True, gradient matrix will be a sparse tensor. In this case,
only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used
"""
"""A simple lookup table.


:param int nums: the size of the lookup table
:param int dims: the size of each vector
:param int padding_idx: pads the tensor with zeros whenever it encounters this index
:param bool sparse: If True, gradient matrix will be a sparse tensor. In this case, only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used
"""
def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0):
super(Embedding, self).__init__() super(Embedding, self).__init__()
self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)


+ 5
- 8
fastNLP/modules/encoder/linear.py View File

@@ -5,15 +5,12 @@ from fastNLP.modules.utils import initial_parameter


class Linear(nn.Module): class Linear(nn.Module):
""" """
Linear module
Args:
input_size : input size
hidden_size : hidden size
num_layers : number of hidden layers
dropout : dropout rate
bidirectional : If True, becomes a bidirectional RNN
"""


:param int input_size: input size
:param int output_size: output size
:param bool bias:
:param str initial_method:
"""
def __init__(self, input_size, output_size, bias=True, initial_method=None): def __init__(self, input_size, output_size, bias=True, initial_method=None):
super(Linear, self).__init__() super(Linear, self).__init__()
self.linear = nn.Linear(input_size, output_size, bias) self.linear = nn.Linear(input_size, output_size, bias)


+ 9
- 7
fastNLP/modules/encoder/lstm.py View File

@@ -6,14 +6,16 @@ from fastNLP.modules.utils import initial_parameter
class LSTM(nn.Module): class LSTM(nn.Module):
"""Long Short Term Memory """Long Short Term Memory


Args:
input_size : input size
hidden_size : hidden size
num_layers : number of hidden layers. Default: 1
dropout : dropout rate. Default: 0.5
bidirectional : If True, becomes a bidirectional RNN. Default: False.
:param int input_size:
:param int hidden_size:
:param int num_layers:
:param float dropout:
:param bool batch_first:
:param bool bidirectional:
:param bool bias:
:param str initial_method:
:param bool get_hidden:
""" """

def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True,
bidirectional=False, bias=True, initial_method=None, get_hidden=False): bidirectional=False, bias=True, initial_method=None, get_hidden=False):
super(LSTM, self).__init__() super(LSTM, self).__init__()


+ 59
- 57
fastNLP/modules/encoder/masked_rnn.py View File

@@ -5,6 +5,8 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F


from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter


def MaskedRecurrent(reverse=False): def MaskedRecurrent(reverse=False):
def forward(input, hidden, cell, mask, train=True, dropout=0): def forward(input, hidden, cell, mask, train=True, dropout=0):
""" """
@@ -254,16 +256,16 @@ class MaskedRNNBase(nn.Module):
return output, hidden return output, hidden


def step(self, input, hx=None, mask=None): def step(self, input, hx=None, mask=None):
'''
execute one step forward (only for one-directional RNN).
Args:
input (batch, input_size): input tensor of this step.
hx (num_layers, batch, hidden_size): the hidden state of last step.
mask (batch): the mask tensor of this step.
Returns:
output (batch, hidden_size): tensor containing the output of this step from the last layer of RNN.
hn (num_layers, batch, hidden_size): tensor containing the hidden state of this step
'''
"""Execute one step forward (only for one-directional RNN).
:param Tensor input: input tensor of this step. (batch, input_size)
:param Tensor hx: the hidden state of last step. (num_layers, batch, hidden_size)
:param Tensor mask: the mask tensor of this step. (batch, )
:returns:
**output** (batch, hidden_size), tensor containing the output of this step from the last layer of RNN.
**hn** (num_layers, batch, hidden_size), tensor containing the hidden state of this step
"""
assert not self.bidirectional, "step only cannot be applied to bidirectional RNN." # aha, typo! assert not self.bidirectional, "step only cannot be applied to bidirectional RNN." # aha, typo!
batch_size = input.size(0) batch_size = input.size(0)
lstm = self.Cell is nn.LSTMCell lstm = self.Cell is nn.LSTMCell
@@ -285,25 +287,23 @@ class MaskedRNN(MaskedRNNBase):
r"""Applies a multi-layer Elman RNN with costomized non-linearity to an r"""Applies a multi-layer Elman RNN with costomized non-linearity to an
input sequence. input sequence.
For each element in the input sequence, each layer computes the following For each element in the input sequence, each layer computes the following
function:
.. math::
h_t = \tanh(w_{ih} * x_t + b_{ih} + w_{hh} * h_{(t-1)} + b_{hh})
function. :math:`h_t = \tanh(w_{ih} * x_t + b_{ih} + w_{hh} * h_{(t-1)} + b_{hh})`

where :math:`h_t` is the hidden state at time `t`, and :math:`x_t` is where :math:`h_t` is the hidden state at time `t`, and :math:`x_t` is
the hidden state of the previous layer at time `t` or :math:`input_t` the hidden state of the previous layer at time `t` or :math:`input_t`
for the first layer. If nonlinearity='relu', then `ReLU` is used instead for the first layer. If nonlinearity='relu', then `ReLU` is used instead
of `tanh`. of `tanh`.
Args:
input_size: The number of expected features in the input x
hidden_size: The number of features in the hidden state h
num_layers: Number of recurrent layers.
nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh'
bias: If False, then the layer does not use bias weights b_ih and b_hh.
Default: True
batch_first: If True, then the input and output tensors are provided
as (batch, seq, feature)
dropout: If non-zero, introduces a dropout layer on the outputs of each
RNN layer except the last layer
bidirectional: If True, becomes a bidirectional RNN. Default: False


:param int input_size: The number of expected features in the input x
:param int hidden_size: The number of features in the hidden state h
:param int num_layers: Number of recurrent layers.
:param str nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh'
:param bool bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
:param bool batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
:param float dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
:param bool bidirectional: If True, becomes a bidirectional RNN. Default: False

Inputs: input, mask, h_0 Inputs: input, mask, h_0
- **input** (seq_len, batch, input_size): tensor containing the features - **input** (seq_len, batch, input_size): tensor containing the features
of the input sequence. of the input sequence.
@@ -327,32 +327,33 @@ class MaskedLSTM(MaskedRNNBase):
r"""Applies a multi-layer long short-term memory (LSTM) RNN to an input r"""Applies a multi-layer long short-term memory (LSTM) RNN to an input
sequence. sequence.
For each element in the input sequence, each layer computes the following For each element in the input sequence, each layer computes the following
function:
function.

.. math:: .. math::
\begin{array}{ll}
i_t = \mathrm{sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
f_t = \mathrm{sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
o_t = \mathrm{sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
c_t = f_t * c_{(t-1)} + i_t * g_t \\
h_t = o_t * \tanh(c_t)
\end{array}

\begin{array}{ll}
i_t = \mathrm{sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
f_t = \mathrm{sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
o_t = \mathrm{sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
c_t = f_t * c_{(t-1)} + i_t * g_t \\
h_t = o_t * \tanh(c_t)
\end{array}

where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell
state at time `t`, :math:`x_t` is the hidden state of the previous layer at state at time `t`, :math:`x_t` is the hidden state of the previous layer at
time `t` or :math:`input_t` for the first layer, and :math:`i_t`, time `t` or :math:`input_t` for the first layer, and :math:`i_t`,
:math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell,
and out gates, respectively. and out gates, respectively.
Args:
input_size: The number of expected features in the input x
hidden_size: The number of features in the hidden state h
num_layers: Number of recurrent layers.
bias: If False, then the layer does not use bias weights b_ih and b_hh.
Default: True
batch_first: If True, then the input and output tensors are provided
as (batch, seq, feature)
dropout: If non-zero, introduces a dropout layer on the outputs of each
RNN layer except the last layer
bidirectional: If True, becomes a bidirectional RNN. Default: False

:param int input_size: The number of expected features in the input x
:param int hidden_size: The number of features in the hidden state h
:param int num_layers: Number of recurrent layers.
:param bool bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
:param bool batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
:param bool dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
:param bool bidirectional: If True, becomes a bidirectional RNN. Default: False

Inputs: input, mask, (h_0, c_0) Inputs: input, mask, (h_0, c_0)
- **input** (seq_len, batch, input_size): tensor containing the features - **input** (seq_len, batch, input_size): tensor containing the features
of the input sequence. of the input sequence.
@@ -380,29 +381,30 @@ class MaskedGRU(MaskedRNNBase):
r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence. r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
For each element in the input sequence, each layer computes the following For each element in the input sequence, each layer computes the following
function: function:

.. math:: .. math::

\begin{array}{ll} \begin{array}{ll}
r_t = \mathrm{sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ r_t = \mathrm{sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
z_t = \mathrm{sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\ z_t = \mathrm{sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\
h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \\ h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \\
\end{array} \end{array}

where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the hidden where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the hidden
state of the previous layer at time `t` or :math:`input_t` for the first state of the previous layer at time `t` or :math:`input_t` for the first
layer, and :math:`r_t`, :math:`z_t`, :math:`n_t` are the reset, input, layer, and :math:`r_t`, :math:`z_t`, :math:`n_t` are the reset, input,
and new gates, respectively. and new gates, respectively.
Args:
input_size: The number of expected features in the input x
hidden_size: The number of features in the hidden state h
num_layers: Number of recurrent layers.
nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh'
bias: If False, then the layer does not use bias weights b_ih and b_hh.
Default: True
batch_first: If True, then the input and output tensors are provided
as (batch, seq, feature)
dropout: If non-zero, introduces a dropout layer on the outputs of each
RNN layer except the last layer
bidirectional: If True, becomes a bidirectional RNN. Default: False

:param int input_size: The number of expected features in the input x
:param int hidden_size: The number of features in the hidden state h
:param int num_layers: Number of recurrent layers.
:param str nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh'
:param bool bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
:param bool batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
:param bool dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
:param bool bidirectional: If True, becomes a bidirectional RNN. Default: False

Inputs: input, mask, h_0 Inputs: input, mask, h_0
- **input** (seq_len, batch, input_size): tensor containing the features - **input** (seq_len, batch, input_size): tensor containing the features
of the input sequence. of the input sequence.


+ 3
- 6
fastNLP/modules/encoder/transformer.py View File

@@ -1,10 +1,9 @@
import torch
from torch import nn from torch import nn
import torch.nn.functional as F


from ..aggregator.attention import MultiHeadAtte from ..aggregator.attention import MultiHeadAtte
from ..other_modules import LayerNormalization from ..other_modules import LayerNormalization



class TransformerEncoder(nn.Module): class TransformerEncoder(nn.Module):
class SubLayer(nn.Module): class SubLayer(nn.Module):
def __init__(self, input_size, output_size, key_size, value_size, num_atte): def __init__(self, input_size, output_size, key_size, value_size, num_atte):
@@ -12,8 +11,8 @@ class TransformerEncoder(nn.Module):
self.atte = MultiHeadAtte(input_size, output_size, key_size, value_size, num_atte) self.atte = MultiHeadAtte(input_size, output_size, key_size, value_size, num_atte)
self.norm1 = LayerNormalization(output_size) self.norm1 = LayerNormalization(output_size)
self.ffn = nn.Sequential(nn.Linear(output_size, output_size), self.ffn = nn.Sequential(nn.Linear(output_size, output_size),
nn.ReLU(),
nn.Linear(output_size, output_size))
nn.ReLU(),
nn.Linear(output_size, output_size))
self.norm2 = LayerNormalization(output_size) self.norm2 = LayerNormalization(output_size)


def forward(self, input, seq_mask): def forward(self, input, seq_mask):
@@ -28,5 +27,3 @@ class TransformerEncoder(nn.Module):


def forward(self, x, seq_mask=None): def forward(self, x, seq_mask=None):
return self.layers(x, seq_mask) return self.layers(x, seq_mask)



+ 9
- 3
fastNLP/modules/encoder/variational_rnn.py View File

@@ -1,5 +1,3 @@
import math

import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
@@ -8,15 +6,17 @@ from fastNLP.modules.utils import initial_parameter
try: try:
from torch import flip from torch import flip
except ImportError: except ImportError:
def flip(x, dims):
def flip(x, dims):
indices = [slice(None)] * x.dim() indices = [slice(None)] * x.dim()
for dim in dims: for dim in dims:
indices[dim] = torch.arange(x.size(dim) - 1, -1, -1, dtype=torch.long, device=x.device) indices[dim] = torch.arange(x.size(dim) - 1, -1, -1, dtype=torch.long, device=x.device)
return x[tuple(indices)] return x[tuple(indices)]



class VarRnnCellWrapper(nn.Module): class VarRnnCellWrapper(nn.Module):
"""Wrapper for normal RNN Cells, make it support variational dropout """Wrapper for normal RNN Cells, make it support variational dropout
""" """

def __init__(self, cell, hidden_size, input_p, hidden_p): def __init__(self, cell, hidden_size, input_p, hidden_p):
super(VarRnnCellWrapper, self).__init__() super(VarRnnCellWrapper, self).__init__()
self.cell = cell self.cell = cell
@@ -88,6 +88,7 @@ class VarRNNBase(nn.Module):
refer to `A Theoretically Grounded Application of Dropout in Recurrent Neural Networks (Yarin Gal and Zoubin Ghahramani, 2016) refer to `A Theoretically Grounded Application of Dropout in Recurrent Neural Networks (Yarin Gal and Zoubin Ghahramani, 2016)
https://arxiv.org/abs/1512.05287`. https://arxiv.org/abs/1512.05287`.
""" """

def __init__(self, mode, Cell, input_size, hidden_size, num_layers=1, def __init__(self, mode, Cell, input_size, hidden_size, num_layers=1,
bias=True, batch_first=False, bias=True, batch_first=False,
input_dropout=0, hidden_dropout=0, bidirectional=False): input_dropout=0, hidden_dropout=0, bidirectional=False):
@@ -177,18 +178,23 @@ class VarRNNBase(nn.Module):
class VarLSTM(VarRNNBase): class VarLSTM(VarRNNBase):
"""Variational Dropout LSTM. """Variational Dropout LSTM.
""" """

def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(VarLSTM, self).__init__(mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs) super(VarLSTM, self).__init__(mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs)



class VarRNN(VarRNNBase): class VarRNN(VarRNNBase):
"""Variational Dropout RNN. """Variational Dropout RNN.
""" """

def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(VarRNN, self).__init__(mode="RNN", Cell=nn.RNNCell, *args, **kwargs) super(VarRNN, self).__init__(mode="RNN", Cell=nn.RNNCell, *args, **kwargs)



class VarGRU(VarRNNBase): class VarGRU(VarRNNBase):
"""Variational Dropout GRU. """Variational Dropout GRU.
""" """

def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(VarGRU, self).__init__(mode="GRU", Cell=nn.GRUCell, *args, **kwargs) super(VarGRU, self).__init__(mode="GRU", Cell=nn.GRUCell, *args, **kwargs)




+ 23
- 34
fastNLP/modules/other_modules.py View File

@@ -29,8 +29,11 @@ class GroupNorm(nn.Module):




class LayerNormalization(nn.Module): class LayerNormalization(nn.Module):
""" Layer normalization module """
"""


:param int layer_size:
:param float eps: default=1e-3
"""
def __init__(self, layer_size, eps=1e-3): def __init__(self, layer_size, eps=1e-3):
super(LayerNormalization, self).__init__() super(LayerNormalization, self).__init__()


@@ -52,12 +55,11 @@ class LayerNormalization(nn.Module):
class BiLinear(nn.Module): class BiLinear(nn.Module):
def __init__(self, n_left, n_right, n_out, bias=True): def __init__(self, n_left, n_right, n_out, bias=True):
""" """
Args:
n_left: size of left input
n_right: size of right input
n_out: size of output
bias: If set to False, the layer will not learn an additive bias.
Default: True

:param int n_left: size of left input
:param int n_right: size of right input
:param int n_out: size of output
:param bool bias: If set to False, the layer will not learn an additive bias. Default: True
""" """
super(BiLinear, self).__init__() super(BiLinear, self).__init__()
self.n_left = n_left self.n_left = n_left
@@ -83,12 +85,9 @@ class BiLinear(nn.Module):


def forward(self, input_left, input_right): def forward(self, input_left, input_right):
""" """
Args:
input_left: Tensor
the left input tensor with shape = [batch1, batch2, ..., left_features]
input_right: Tensor
the right input tensor with shape = [batch1, batch2, ..., right_features]
Returns:
:param Tensor input_left: the left input tensor with shape = [batch1, batch2, ..., left_features]
:param Tensor input_right: the right input tensor with shape = [batch1, batch2, ..., right_features]

""" """
left_size = input_left.size() left_size = input_left.size()
right_size = input_right.size() right_size = input_right.size()
@@ -118,16 +117,11 @@ class BiLinear(nn.Module):
class BiAffine(nn.Module): class BiAffine(nn.Module):
def __init__(self, n_enc, n_dec, n_labels, biaffine=True, **kwargs): def __init__(self, n_enc, n_dec, n_labels, biaffine=True, **kwargs):
""" """
Args:
n_enc: int
the dimension of the encoder input.
n_dec: int
the dimension of the decoder input.
n_labels: int
the number of labels of the crf layer
biaffine: bool
if apply bi-affine parameter.
**kwargs:

:param int n_enc: the dimension of the encoder input.
:param int n_dec: the dimension of the decoder input.
:param int n_labels: the number of labels of the crf layer
:param bool biaffine: if apply bi-affine parameter.
""" """
super(BiAffine, self).__init__() super(BiAffine, self).__init__()
self.n_enc = n_enc self.n_enc = n_enc
@@ -154,17 +148,12 @@ class BiAffine(nn.Module):


def forward(self, input_d, input_e, mask_d=None, mask_e=None): def forward(self, input_d, input_e, mask_d=None, mask_e=None):
""" """
Args:
input_d: Tensor
the decoder input tensor with shape = [batch, length_decoder, input_size]
input_e: Tensor
the child input tensor with shape = [batch, length_encoder, input_size]
mask_d: Tensor or None
the mask tensor for decoder with shape = [batch, length_decoder]
mask_e: Tensor or None
the mask tensor for encoder with shape = [batch, length_encoder]
Returns: Tensor
the energy tensor with shape = [batch, num_label, length, length]

:param Tensor input_d: the decoder input tensor with shape = [batch, length_decoder, input_size]
:param Tensor input_e: the child input tensor with shape = [batch, length_encoder, input_size]
:param mask_d: Tensor or None, the mask tensor for decoder with shape = [batch, length_decoder]
:param mask_e: Tensor or None, the mask tensor for encoder with shape = [batch, length_encoder]
:returns: Tensor, the energy tensor with shape = [batch, num_label, length, length]
""" """
assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.' assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.'
batch, length_decoder, _ = input_d.size() batch, length_decoder, _ = input_d.size()


+ 2
- 2
fastNLP/modules/utils.py View File

@@ -15,7 +15,7 @@ def initial_parameter(net, initial_method=None):
"""A method used to initialize the weights of PyTorch models. """A method used to initialize the weights of PyTorch models.


:param net: a PyTorch model :param net: a PyTorch model
:param initial_method: str, one of the following initializations
:param str initial_method: one of the following initializations.


- xavier_uniform - xavier_uniform
- xavier_normal (default) - xavier_normal (default)
@@ -79,7 +79,7 @@ def seq_mask(seq_len, max_len):


:param seq_len: list or torch.Tensor, the lengths of sequences in a batch. :param seq_len: list or torch.Tensor, the lengths of sequences in a batch.
:param max_len: int, the maximum sequence length in a batch. :param max_len: int, the maximum sequence length in a batch.
:return mask: torch.LongTensor, [batch_size, max_len]
:return: mask, torch.LongTensor, [batch_size, max_len]


""" """
if not isinstance(seq_len, torch.Tensor): if not isinstance(seq_len, torch.Tensor):


+ 2
- 2
reproduction/chinese_word_segment/models/cws_model.py View File

@@ -65,7 +65,7 @@ class CWSBiLSTMEncoder(BaseModel):


x_tensor = self.char_embedding(chars) x_tensor = self.char_embedding(chars)


if not bigrams is None:
if hasattr(self, 'bigram_embedding'):
bigram_tensor = self.bigram_embedding(bigrams).view(batch_size, max_len, -1) bigram_tensor = self.bigram_embedding(bigrams).view(batch_size, max_len, -1)
x_tensor = torch.cat([x_tensor, bigram_tensor], dim=2) x_tensor = torch.cat([x_tensor, bigram_tensor], dim=2)
x_tensor = self.embedding_drop(x_tensor) x_tensor = self.embedding_drop(x_tensor)
@@ -185,5 +185,5 @@ class CWSBiLSTMCRF(BaseModel):
feats = self.decoder_model(feats) feats = self.decoder_model(feats)
probs = self.crf.viterbi_decode(feats, masks, get_score=False) probs = self.crf.viterbi_decode(feats, masks, get_score=False)


return {'pred': probs}
return {'pred': probs, 'seq_lens':seq_lens}



+ 11
- 2
reproduction/chinese_word_segment/process/cws_processor.py View File

@@ -238,7 +238,7 @@ class VocabIndexerProcessor(Processor):


""" """
def __init__(self, field_name, new_added_filed_name=None, min_freq=1, max_size=None, def __init__(self, field_name, new_added_filed_name=None, min_freq=1, max_size=None,
verbose=1, is_input=True):
verbose=0, is_input=True):
""" """


:param field_name: 从哪个field_name创建词表,以及对哪个field_name进行index操作 :param field_name: 从哪个field_name创建词表,以及对哪个field_name进行index操作
@@ -320,6 +320,15 @@ class VocabIndexerProcessor(Processor):
def get_vocab_size(self): def get_vocab_size(self):
return len(self.vocab) return len(self.vocab)


def set_verbose(self, verbose):
"""
设置processor verbose状态。

:param verbose: int, 0,不输出任何信息;1,输出vocab 信息。
:return:
"""
self.verbose = verbose

class VocabProcessor(Processor): class VocabProcessor(Processor):
def __init__(self, field_name, min_freq=1, max_size=None): def __init__(self, field_name, min_freq=1, max_size=None):


@@ -378,7 +387,7 @@ class BMES2OutputProcessor(Processor):
prediction为BSEMS,会被认为是SSSSS. prediction为BSEMS,会被认为是SSSSS.


""" """
def __init__(self, chars_field_name='chars_list', tag_field_name='pred_tags', new_added_field_name='output',
def __init__(self, chars_field_name='chars_list', tag_field_name='pred', new_added_field_name='output',
b_idx = 0, m_idx = 1, e_idx = 2, s_idx = 3): b_idx = 0, m_idx = 1, e_idx = 2, s_idx = 3):
""" """




+ 14
- 13
reproduction/chinese_word_segment/train_context.py View File

@@ -11,7 +11,6 @@ from reproduction.chinese_word_segment.process.cws_processor import InputTargetP
from reproduction.chinese_word_segment.cws_io.cws_reader import ConllCWSReader from reproduction.chinese_word_segment.cws_io.cws_reader import ConllCWSReader
from reproduction.chinese_word_segment.models.cws_model import CWSBiLSTMCRF from reproduction.chinese_word_segment.models.cws_model import CWSBiLSTMCRF


from reproduction.chinese_word_segment.utils import calculate_pre_rec_f1


ds_name = 'msr' ds_name = 'msr'


@@ -39,8 +38,6 @@ bigram_vocab_proc = VocabIndexerProcessor('bigrams_lst', new_added_filed_name='b


seq_len_proc = SeqLenProcessor('chars') seq_len_proc = SeqLenProcessor('chars')


input_target_proc = InputTargetProcessor(input_fields=['chars', 'bigrams', 'seq_lens', "target"],
target_fields=['target', 'seq_lens'])
# 2. 使用processor # 2. 使用processor
fs2hs_proc(tr_dataset) fs2hs_proc(tr_dataset)


@@ -63,15 +60,15 @@ char_vocab_proc(dev_dataset)
bigram_vocab_proc(dev_dataset) bigram_vocab_proc(dev_dataset)
seq_len_proc(dev_dataset) seq_len_proc(dev_dataset)


input_target_proc(tr_dataset)
input_target_proc(dev_dataset)
dev_dataset.set_input('target')
tr_dataset.set_input('target')



print("Finish preparing data.") print("Finish preparing data.")


# 3. 得到数据集可以用于训练了 # 3. 得到数据集可以用于训练了
# TODO pretrain的embedding是怎么解决的? # TODO pretrain的embedding是怎么解决的?


import torch
from torch import optim from torch import optim




@@ -79,8 +76,8 @@ tag_size = tag_proc.tag_size


cws_model = CWSBiLSTMCRF(char_vocab_proc.get_vocab_size(), embed_dim=100, cws_model = CWSBiLSTMCRF(char_vocab_proc.get_vocab_size(), embed_dim=100,
bigram_vocab_num=bigram_vocab_proc.get_vocab_size(), bigram_vocab_num=bigram_vocab_proc.get_vocab_size(),
bigram_embed_dim=100, num_bigram_per_char=8,
hidden_size=200, bidirectional=True, embed_drop_p=0.2,
bigram_embed_dim=30, num_bigram_per_char=8,
hidden_size=200, bidirectional=True, embed_drop_p=0.3,
num_layers=1, tag_size=tag_size) num_layers=1, tag_size=tag_size)
cws_model.cuda() cws_model.cuda()


@@ -108,7 +105,7 @@ pp.add_processor(bigram_proc)
pp.add_processor(char_vocab_proc) pp.add_processor(char_vocab_proc)
pp.add_processor(bigram_vocab_proc) pp.add_processor(bigram_vocab_proc)
pp.add_processor(seq_len_proc) pp.add_processor(seq_len_proc)
pp.add_processor(input_target_proc)
# pp.add_processor(input_target_proc)


# te_filename = '/hdd/fudanNLP/CWS/CWS_semiCRF/all_data/{}/middle_files/{}_test.txt'.format(ds_name, ds_name) # te_filename = '/hdd/fudanNLP/CWS/CWS_semiCRF/all_data/{}/middle_files/{}_test.txt'.format(ds_name, ds_name)
te_filename = '/home/hyan/ctb3/test.conllx' te_filename = '/home/hyan/ctb3/test.conllx'
@@ -142,14 +139,16 @@ from fastNLP.api.processor import ModelProcessor
from reproduction.chinese_word_segment.process.cws_processor import BMES2OutputProcessor from reproduction.chinese_word_segment.process.cws_processor import BMES2OutputProcessor


model_proc = ModelProcessor(cws_model) model_proc = ModelProcessor(cws_model)
output_proc = BMES2OutputProcessor()
output_proc = BMES2OutputProcessor(chars_field_name='chars_lst', tag_field_name='pred')


pp = Pipeline() pp = Pipeline()
pp.add_processor(fs2hs_proc) pp.add_processor(fs2hs_proc)
# pp.add_processor(sp_proc) # pp.add_processor(sp_proc)
pp.add_processor(char_proc) pp.add_processor(char_proc)
pp.add_processor(bigram_proc) pp.add_processor(bigram_proc)
char_vocab_proc.set_verbose(0)
pp.add_processor(char_vocab_proc) pp.add_processor(char_vocab_proc)
bigram_vocab_proc.set_verbose(0)
pp.add_processor(bigram_vocab_proc) pp.add_processor(bigram_vocab_proc)
pp.add_processor(seq_len_proc) pp.add_processor(seq_len_proc)


@@ -158,9 +157,11 @@ pp.add_processor(output_proc)




# TODO 这里貌似需要区分test pipeline与infer pipeline # TODO 这里貌似需要区分test pipeline与infer pipeline

infer_context_dict = {'pipeline': pp}
# torch.save(infer_context_dict, 'models/cws_crf.pkl')
import torch
import datetime
now = datetime.datetime.now()
infer_context_dict = {'pipeline': pp, 'tag_proc': tag_proc}
torch.save(infer_context_dict, 'models/cws_crf_{}_{}.pkl'.format(now.month, now.day))




# TODO 还需要考虑如何替换回原文的问题? # TODO 还需要考虑如何替换回原文的问题?


+ 1
- 1
test/core/test_dataset.py View File

@@ -197,4 +197,4 @@ class TestDataSetIter(unittest.TestCase):
def test__repr__(self): def test__repr__(self):
ds = DataSet({"x": [[1, 2, 3, 4]] * 10, "y": [[5, 6]] * 10}) ds = DataSet({"x": [[1, 2, 3, 4]] * 10, "y": [[5, 6]] * 10})
for iter in ds: for iter in ds:
self.assertEqual(iter.__repr__(), "{'x': [1, 2, 3, 4],\n'y': [5, 6]}")
self.assertEqual(iter.__repr__(), "{'x': [1, 2, 3, 4] type=list,\n'y': [5, 6] type=list}")

+ 2
- 1
test/core/test_metrics.py View File

@@ -360,7 +360,8 @@ class TestBMESF1PreRecMetric(unittest.TestCase):


metric = BMESF1PreRecMetric() metric = BMESF1PreRecMetric()
metric(pred_dict, target_dict) metric(pred_dict, target_dict)
self.assertDictEqual(metric.get_metric(), {'f1': 0.999999, 'precision': 1.0, 'recall': 1.0})
self.assertDictEqual(metric.get_metric(), {'f': 1.0, 'pre': 1.0, 'rec': 1.0})



class TestUsefulFunctions(unittest.TestCase): class TestUsefulFunctions(unittest.TestCase):
# 测试metrics.py中一些看上去挺有用的函数 # 测试metrics.py中一些看上去挺有用的函数


+ 1190
- 0
tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb
File diff suppressed because it is too large
View File


+ 8
- 0
tutorials/fastnlp_advanced_tutorial/data/config View File

@@ -0,0 +1,8 @@
[esim_model]
embed_dim = 300
hidden_size = 300
batch_first = true
dropout = 0.3
num_classes = 3
gpu = true
batch_size = 32

+ 100
- 0
tutorials/fastnlp_advanced_tutorial/hypothesis View File

@@ -0,0 +1,100 @@
A person is training his horse for a competition .
A person is at a diner , ordering an omelette .
A person is outdoors , on a horse .
They are smiling at their parents
There are children present
The kids are frowning
The boy skates down the sidewalk .
The boy does a skateboarding trick .
The boy is wearing safety equipment .
An older man drinks his juice as he waits for his daughter to get off work .
A boy flips a burger .
An elderly man sits in a small shop .
Some women are hugging on vacation .
The women are sleeping .
There are women showing affection .
The people are eating omelettes .
The people are sitting at desks in school .
The diners are at a restaurant .
A man is drinking juice .
Two women are at a restaurant drinking wine .
A man in a restaurant is waiting for his meal to arrive .
A blond man getting a drink of water from a fountain in the park .
A blond man wearing a brown shirt is reading a book on a bench in the park
A blond man drinking water from a fountain .
The friends scowl at each other over a full dinner table .
There are two woman in this picture .
The friends have just met for the first time in 20 years , and have had a great time catching up .
The two sisters saw each other across the crowded diner and shared a hug , both clutching their doggie bags .
Two groups of rival gang members flipped each other off .
Two women hug each other .
A team is trying to score the games winning out .
A team is trying to tag a runner out .
A team is playing baseball on Saturn .
A school hosts a basketball game .
A high school is hosting an event .
A school is hosting an event .
The women do not care what clothes they wear .
Women are waiting by a tram .
The women enjoy having a good fashion sense .
A child with mom and dad , on summer vacation at the beach .
A family of three is at the beach .
A family of three is at the mall shopping .
The people waiting on the train are sitting .
There are people just getting on a train
There are people waiting on a train .
A couple are playing with a young child outside .
A couple are playing frisbee with a young child at the beach .
A couple watch a little girl play by herself on the beach .
The family is sitting down for dinner .
The family is outside .
The family is on vacation .
The people are standing still on the curb .
Near a couple of restaurants , two people walk across the street .
The couple are walking across the street together .
The woman is nake .
The woman is cold .
The woman is wearing green .
The man with the sign is caucasian .
They are protesting outside the capital .
A woman in white .
A man is advertising for a restaurant .
The woman is wearing black .
A man and a woman walk down a crowded city street .
The woman is wearing white .
They are working for John 's Pizza .
Olympic swimming .
A man and a soman are eating together at John 's Pizza and Gyro .
They are walking with a sign .
The woman is waiting for a friend .
The man is sitting down while he has a sign for John 's Pizza and Gyro in his arms .
The woman and man are outdoors .
A woman ordering pizza .
The people are related .
Two adults run across the street to get away from a red shirted person chasing them .
The adults are both male and female .
Two people walk home after a tasty steak dinner .
Two adults swimming in water
Two adults walk across a street .
Two people ride bicycles into a tunnel .
Two people walk away from a restaurant across a street .
Two adults walking across a road near the convicted prisoner dressed in red
Two friends cross a street .
Some people board a train .
Two adults walk across the street .
Two adults walking across a road
There are no women in the picture .
Two adults walk across the street to get away from a red shirted person who is chasing them .
A married couple is sleeping .
A female is next to a man .
A married couple is walking next to each other .
Nobody has food .
A woman eats a banana and walks across a street , and there is a man trailing behind her .
The woman and man are playing baseball together .
two coworkers cross pathes on a street
A woman eats ice cream walking down the sidewalk , and there is another woman in front of her with a purse .
The mans briefcase is for work .
A person eating .
A person that is hungry .
An actress and her favorite assistant talk a walk in the city .
a woman eating a banana crosses a street

+ 100
- 0
tutorials/fastnlp_advanced_tutorial/label View File

@@ -0,0 +1,100 @@
1
2
0
1
0
2
2
0
1
1
2
1
1
2
0
1
2
0
0
2
1
1
2
0
2
0
1
1
2
0
1
0
2
2
1
0
2
0
1
1
0
2
1
0
0
0
1
2
2
0
1
2
0
1
2
1
0
1
2
0
0
2
1
0
1
2
2
0
1
2
0
1
1
2
0
1
2
0
2
0
1
1
2
0
0
2
1
2
0
1
2
0
2
1
2
1
0
1
1
0

+ 100
- 0
tutorials/fastnlp_advanced_tutorial/premise View File

@@ -0,0 +1,100 @@
A person on a horse jumps over a broken down airplane .
A person on a horse jumps over a broken down airplane .
A person on a horse jumps over a broken down airplane .
Children smiling and waving at camera
Children smiling and waving at camera
Children smiling and waving at camera
A boy is jumping on skateboard in the middle of a red bridge .
A boy is jumping on skateboard in the middle of a red bridge .
A boy is jumping on skateboard in the middle of a red bridge .
An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background .
An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background .
An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background .
Two blond women are hugging one another .
Two blond women are hugging one another .
Two blond women are hugging one another .
A few people in a restaurant setting , one of them is drinking orange juice .
A few people in a restaurant setting , one of them is drinking orange juice .
A few people in a restaurant setting , one of them is drinking orange juice .
An older man is drinking orange juice at a restaurant .
An older man is drinking orange juice at a restaurant .
An older man is drinking orange juice at a restaurant .
A man with blond-hair , and a brown shirt drinking out of a public water fountain .
A man with blond-hair , and a brown shirt drinking out of a public water fountain .
A man with blond-hair , and a brown shirt drinking out of a public water fountain .
Two women who just had lunch hugging and saying goodbye .
Two women who just had lunch hugging and saying goodbye .
Two women who just had lunch hugging and saying goodbye .
Two women , holding food carryout containers , hug .
Two women , holding food carryout containers , hug .
Two women , holding food carryout containers , hug .
A Little League team tries to catch a runner sliding into a base in an afternoon game .
A Little League team tries to catch a runner sliding into a base in an afternoon game .
A Little League team tries to catch a runner sliding into a base in an afternoon game .
The school is having a special event in order to show the american culture on how other cultures are dealt with in parties .
The school is having a special event in order to show the american culture on how other cultures are dealt with in parties .
The school is having a special event in order to show the american culture on how other cultures are dealt with in parties .
High fashion ladies wait outside a tram beside a crowd of people in the city .
High fashion ladies wait outside a tram beside a crowd of people in the city .
High fashion ladies wait outside a tram beside a crowd of people in the city .
A man , woman , and child enjoying themselves on a beach .
A man , woman , and child enjoying themselves on a beach .
A man , woman , and child enjoying themselves on a beach .
People waiting to get on a train or just getting off .
People waiting to get on a train or just getting off .
People waiting to get on a train or just getting off .
A couple playing with a little boy on the beach .
A couple playing with a little boy on the beach .
A couple playing with a little boy on the beach .
A couple play in the tide with their young son .
A couple play in the tide with their young son .
A couple play in the tide with their young son .
A man and a woman cross the street in front of a pizza and gyro restaurant .
A man and a woman cross the street in front of a pizza and gyro restaurant .
A man and a woman cross the street in front of a pizza and gyro restaurant .
A woman in a green jacket and hood over her head looking towards a valley .
A woman in a green jacket and hood over her head looking towards a valley .
A woman in a green jacket and hood over her head looking towards a valley .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Woman in white in foreground and a man slightly behind walking with a sign for John 's Pizza and Gyro in the background .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
Two adults , one female in white , with shades and one male , gray clothes , walking across a street , away from a eatery with a blurred image of a dark colored red shirted person in the foreground .
A woman wearing all white and eating , walks next to a man holding a briefcase .
A woman wearing all white and eating , walks next to a man holding a briefcase .
A woman wearing all white and eating , walks next to a man holding a briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .
A woman is walking across the street eating a banana , while a man is following with his briefcase .

+ 77
- 0
tutorials/fastnlp_advanced_tutorial/tutorial_sample_dataset.csv View File

@@ -0,0 +1,77 @@
A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . 1
This quiet , introspective and entertaining independent is worth seeking . 4
Even fans of Ismail Merchant 's work , I suspect , would have a hard time sitting through this one . 1
A positively thrilling combination of ethnography and all the intrigue , betrayal , deceit and murder of a Shakespearean tragedy or a juicy soap opera . 3
Aggressive self-glorification and a manipulative whitewash . 1
A comedy-drama of nearly epic proportions rooted in a sincere performance by the title character undergoing midlife crisis . 4
Narratively , Trouble Every Day is a plodding mess . 1
The Importance of Being Earnest , so thick with wit it plays like a reading from Bartlett 's Familiar Quotations 3
But it does n't leave you with much . 1
You could hate it for the same reason . 1
There 's little to recommend Snow Dogs , unless one considers cliched dialogue and perverse escapism a source of high hilarity . 1
Kung Pow is Oedekerk 's realization of his childhood dream to be in a martial-arts flick , and proves that sometimes the dreams of youth should remain just that . 1
The performances are an absolute joy . 4
Fresnadillo has something serious to say about the ways in which extravagant chance can distort our perspective and throw us off the path of good sense . 3
I still like Moonlight Mile , better judgment be damned . 3
A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story . 3
a bilingual charmer , just like the woman who inspired it 3
Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting . 2
As inept as big-screen remakes of The Avengers and The Wild Wild West . 1
It 's everything you 'd expect -- but nothing more . 2
Best indie of the year , so far . 4
Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications . 3
It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend . 1
That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is . 2
The plot is romantic comedy boilerplate from start to finish . 2
It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications . 2
A film that clearly means to preach exclusively to the converted . 2
While The Importance of Being Earnest offers opportunities for occasional smiles and chuckles , it does n't give us a reason to be in the theater beyond Wilde 's wit and the actors ' performances . 1
The latest vapid actor 's exercise to appropriate the structure of Arthur Schnitzler 's Reigen . 1
More vaudeville show than well-constructed narrative , but on those terms it 's inoffensive and actually rather sweet . 2
Nothing more than a run-of-the-mill action flick . 2
Hampered -- no , paralyzed -- by a self-indulgent script ... that aims for poetry and ends up sounding like satire . 0
Ice Age is the first computer-generated feature cartoon to feel like other movies , and that makes for some glacial pacing early on . 2
There 's very little sense to what 's going on here , but the makers serve up the cliches with considerable dash . 2
Cattaneo should have followed the runaway success of his first film , The Full Monty , with something different . 2
They 're the unnamed , easily substitutable forces that serve as whatever terror the heroes of horror movies try to avoid . 1
It almost feels as if the movie is more interested in entertaining itself than in amusing us . 1
The movie 's progression into rambling incoherence gives new meaning to the phrase ` fatal script error . ' 0
I still like Moonlight Mile , better judgment be damned . 3
A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story . 3
a bilingual charmer , just like the woman who inspired it 3
Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting . 2
As inept as big-screen remakes of The Avengers and The Wild Wild West . 1
It 's everything you 'd expect -- but nothing more . 2
Best indie of the year , so far . 4
Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications . 3
It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend . 1
That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is . 2
The plot is romantic comedy boilerplate from start to finish . 2
It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications . 2
A film that clearly means to preach exclusively to the converted . 2
I still like Moonlight Mile , better judgment be damned . 3
A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story . 3
a bilingual charmer , just like the woman who inspired it 3
Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting . 2
As inept as big-screen remakes of The Avengers and The Wild Wild West . 1
It 's everything you 'd expect -- but nothing more . 2
Best indie of the year , so far . 4
Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications . 3
It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend . 1
That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is . 2
The plot is romantic comedy boilerplate from start to finish . 2
It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications . 2
A film that clearly means to preach exclusively to the converted . 2
I still like Moonlight Mile , better judgment be damned . 3
A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story . 3
a bilingual charmer , just like the woman who inspired it 3
Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting . 2
As inept as big-screen remakes of The Avengers and The Wild Wild West . 1
It 's everything you 'd expect -- but nothing more . 2
Best indie of the year , so far . 4
Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications . 3
It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend . 1
That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is . 2
The plot is romantic comedy boilerplate from start to finish . 2
It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications . 2
A film that clearly means to preach exclusively to the converted . 2

+ 30522
- 0
tutorials/fastnlp_advanced_tutorial/vocab.txt
File diff suppressed because it is too large
View File


Loading…
Cancel
Save