Merge branch 'dev0.5.0' of https://github.com/fastnlp/fastNLP into dev0.5.0

5 years ago · 685e9900e5
--- a/docs/source/fastNLP.core.callback.rst
+++ b/docs/source/fastNLP.core.callback.rst
@@ -2,6 +2,6 @@ fastNLP.core.callback
 =====================

 .. automodule:: fastNLP.core.callback
   :members: Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, EchoCallback, CallbackException, EarlyStopError
   :members: Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, CallbackException, EarlyStopError
   :inherited-members:

--- a/docs/source/fastNLP.io.loader.rst
+++ b/docs/source/fastNLP.io.loader.rst
@@ -2,6 +2,6 @@ fastNLP.io.loader
 =================

 .. automodule:: fastNLP.io.loader
   :members: Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CoReferenceLoader
   :members: Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, LCQMCLoader, CoReferenceLoader
   :inherited-members:

--- a/docs/source/fastNLP.io.pipe.rst
+++ b/docs/source/fastNLP.io.pipe.rst
@@ -2,6 +2,6 @@ fastNLP.io.pipe
 ===============

 .. automodule:: fastNLP.io.pipe
   :members: Pipe, CWSPipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe, Conll2003Pipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CoReferencePipe
   :members: Pipe, CWSPipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe, Conll2003Pipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, CNXNLIBertPipe, BQCorpusBertPipe, LCQMCBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, LCQMCPipe, CNXNLIPipe, BQCorpusPipe, RenamePipe, GranularizePipe, MachingTruncatePipe, CoReferencePipe
   :inherited-members:

--- a/docs/source/fastNLP.io.rst
+++ b/docs/source/fastNLP.io.rst
@@ -2,7 +2,7 @@ fastNLP.io
 ==========

 .. automodule:: fastNLP.io
   :members: DataBundle, EmbedLoader, Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, WeiboNERLoader, PeopleDailyNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, Pipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, Conll2003Pipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, CWSPipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, ModelLoader, ModelSaver
   :members: DataBundle, EmbedLoader, Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, WeiboNERLoader, PeopleDailyNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, LCQMCLoader, Pipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe, Conll2003Pipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, CWSPipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, ModelLoader, ModelSaver
   :inherited-members:

 子模块
--- a/docs/source/fastNLP.rst
+++ b/docs/source/fastNLP.rst
@@ -2,7 +2,7 @@ fastNLP
 =======

 .. automodule:: fastNLP
   :members: Instance, FieldArray, DataSetIter, BatchIter, TorchLoaderIter, Vocabulary, DataSet, Const, Trainer, Tester, Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, EchoCallback, CallbackException, EarlyStopError, Padder, AutoPadder, EngChar2DPadder, AccuracyMetric, SpanFPreRecMetric, ExtractiveQAMetric, Optimizer, SGD, Adam, AdamW, Sampler, SequentialSampler, BucketSampler, RandomSampler, LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward, cache_results, logger
   :members: Instance, FieldArray, DataSetIter, BatchIter, TorchLoaderIter, Vocabulary, DataSet, Const, Trainer, Tester, Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, CallbackException, EarlyStopError, Padder, AutoPadder, EngChar2DPadder, AccuracyMetric, SpanFPreRecMetric, ExtractiveQAMetric, Optimizer, SGD, Adam, AdamW, Sampler, SequentialSampler, BucketSampler, RandomSampler, LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward, cache_results, logger
   :inherited-members:

 子模块
--- a/docs/source/tutorials/tutorial_10_callback.rst
+++ b/docs/source/tutorials/tutorial_10_callback.rst
@@ -1,67 +1,132 @@
 ===================================================
 使用Callback自定义你的训练过程
 使用 Callback 自定义你的训练过程
 ===================================================

 在训练时，我们常常要使用trick来提高模型的性能（如调节学习率），或者要打印训练中的信息。
 这里我们提供Callback类，在Trainer中插入代码，完成一些自定义的操作。
 - 什么是 Callback
 - 使用 Callback 
 - 一些常用的 Callback
 - 自定义实现 Callback

 我们使用和 :doc:`/user/quickstart` 中一样的任务来进行详细的介绍。
 给出一段评价性文字，预测其情感倾向是积极（label=1）、消极（label=0）还是中性（label=2），使用 :class:`~fastNLP.Trainer`  和  :class:`~fastNLP.Tester`  来进行快速训练和测试。
 关于数据处理，Loss和Optimizer的选择可以看其他教程，这里仅在训练时加入学习率衰减。

 什么是Callback
 ---------------------
 Callback的构建和使用

 Callback 是与 Trainer 紧密结合的模块，利用 Callback 可以在 Trainer 训练时，加入自定义的操作，比如梯度裁剪，学习率调节，测试模型的性能等。定义的 Callback 会在训练的特定阶段被调用。

 fastNLP 中提供了很多常用的 Callback ，开箱即用。


 使用 Callback
 ---------------------

 创建Callback
    我们可以继承fastNLP :class:`~fastNLP.Callback` 类来定义自己的Callback。
    这里我们实现一个让学习率线性衰减的Callback。
 使用 Callback 很简单，将需要的 callback 按 list 存储，以对应参数 ``callbacks`` 传入对应的 Trainer。Trainer 在训练时就会自动执行这些 Callback 指定的操作了。


 .. code-block:: python

    from fastNLP import (Callback, EarlyStopCallback,
                         Trainer, CrossEntropyLoss, AccuracyMetric)
    from fastNLP.models import CNNText
    import torch.cuda

    # prepare data
    def get_data():
        from fastNLP.io import ChnSentiCorpPipe as pipe
        data = pipe().process_from_file()
        print(data)
        data.rename_field('chars', 'words')
        train_data = data.datasets['train']
        dev_data = data.datasets['dev']
        test_data = data.datasets['test']
        vocab = data.vocabs['words']
        tgt_vocab = data.vocabs['target']
        return train_data, dev_data, test_data, vocab, tgt_vocab

    # prepare model
    train_data, dev_data, _, vocab, tgt_vocab = get_data()
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model = CNNText((len(vocab),50), num_classes=len(tgt_vocab))

    # define callback
    callbacks=[EarlyStopCallback(5)]

    # pass callbacks to Trainer
    def train_with_callback(cb_list):
        trainer = Trainer(
            device=device,
            n_epochs=3,
            model=model,
            train_data=train_data,
            dev_data=dev_data,
            loss=CrossEntropyLoss(),
            metrics=AccuracyMetric(),
            callbacks=cb_list,
            check_code_level=-1
        )
        trainer.train()

    .. code-block:: python
    train_with_callback(callbacks)

        import fastNLP

        class LRDecay(fastNLP.Callback):
            def __init__(self):
                super(LRDecay, self).__init__()
                self.base_lrs = []
                self.delta = []

            def on_train_begin(self):
                # 初始化，仅训练开始时调用
                self.base_lrs = [pg['lr'] for pg in self.optimizer.param_groups]
                self.delta = [float(lr) / self.n_epochs for lr in self.base_lrs]
 fastNLP 中的 Callback
 ---------------------

            def on_epoch_end(self):
                # 每个epoch结束时，更新学习率
                ep = self.epoch
                lrs = [lr - d * ep for lr, d in zip(self.base_lrs, self.delta)]
                self.change_lr(lrs)
 fastNLP 中提供了很多常用的 Callback，如梯度裁剪，训练时早停和测试验证集，fitlog 等等。具体 Callback 请参考 fastNLP.core.callbacks

            def change_lr(self, lrs):
                for pg, lr in zip(self.optimizer.param_groups, lrs):
                    pg['lr'] = lr
 .. code-block:: python

    这里，:class:`~fastNLP.Callback` 中所有以 ``on_`` 开头的类方法会在 :class:`~fastNLP.Trainer` 的训练中在特定时间调用。
    如 on_train_begin() 会在训练开始时被调用，on_epoch_end() 会在每个 epoch 结束时调用。
    具体有哪些类方法，参见文档 :class:`~fastNLP.Callback` 。
    from fastNLP import EarlyStopCallback, GradientClipCallback, EvaluateCallback
    callbacks = [
        EarlyStopCallback(5),
        GradientClipCallback(clip_value=5, clip_type='value'),
        EvaluateCallback(dev_data)
    ]

    另外，为了使用方便，可以在 :class:`~fastNLP.Callback` 内部访问 :class:`~fastNLP.Trainer` 中的属性，如 optimizer, epoch, step，分别对应训练时的优化器，当前epoch数，和当前的总step数。
    具体可访问的属性，参见文档 :class:`~fastNLP.Callback` 。
    train_with_callback(callbacks)

 使用Callback
    在定义好 :class:`~fastNLP.Callback` 之后，就能将它传入Trainer的 ``callbacks`` 参数，在实际训练时使用。
 自定义 Callback
 ---------------------

    .. code-block:: python
 这里我们以一个简单的 Callback作为例子，它的作用是打印每一个 Epoch 平均训练 loss。

        """
        数据预处理，模型定义等等
        """
 1. 创建 Callback
    
    要自定义 Callback，我们要实现一个类，继承 fastNLP.Callback。这里我们定义 MyCallBack ，继承 fastNLP.Callback 。

        trainer = fastNLP.Trainer(
            model=model, train_data=train_data, dev_data=dev_data,
            optimizer=optimizer, metrics=metrics,
            batch_size=10, n_epochs=100,
            callbacks=[LRDecay()])
 2. 指定 Callback 调用的阶段
    
    Callback 中所有以 `on_` 开头的类方法会在 Trainer 的训练中在特定阶段调用。 如 on_train_begin() 会在训练开始时被调用，on_epoch_end()
    会在每个 epoch 结束时调用。 具体有哪些类方法，参见 Callback 文档。这里， MyCallBack 在求得loss时调用 on_backward_begin() 记录
    当前 loss，在每一个 epoch 结束时调用 on_epoch_end() ，求当前 epoch 平均loss并输出。

 3. 使用 Callback 的属性访问 Trainer 的内部信息
    
    为了方便使用，可以使用 Callback 的属性，访问 Trainer 中的对应信息，如 optimizer, epoch, n_epochs，分别对应训练时的优化器，
    当前 epoch 数，和总 epoch 数。 具体可访问的属性，参见文档 Callback 。这里， MyCallBack 为了求平均 loss ，需要知道当前 epoch 的总步
    数，可以通过 self.step 属性得到当前训练了多少步。

 .. code-block:: python

    from fastNLP import Callback
    from fastNLP import logger

    class MyCallBack(Callback):
        """Print average loss in each epoch"""
        def __init__(self):
            super().__init__()
            self.total_loss = 0
            self.start_step = 0

        def on_backward_begin(self, loss):
            self.total_loss += loss.item()

        def on_epoch_end(self):
            n_steps = self.step - self.start_step
            avg_loss = self.total_loss / n_steps
            logger.info('Avg loss at epoch %d, %.6f', self.epoch, avg_loss)
            self.start_step = self.step

    callbacks = [MyCallBack()]
    train_with_callback(callbacks)

        trainer.train()
--- a/docs/source/tutorials/tutorial_2_vocabulary.rst
+++ b/docs/source/tutorials/tutorial_2_vocabulary.rst
@@ -86,7 +86,7 @@ Vocabulary
    vocab.from_dataset(tr_data, field_name='chars', no_create_entry_dataset=[dev_data])


 :class:`~fastNLP.Vocabulary` 中的 `no_create_entry` , 建议在添加来自于测试集和验证集的词的时候将该参数置为True, 或将验证集和测试集
 :class:`~fastNLP.Vocabulary` 中的 `no_create_entry` , 建议在添加来自于测试集和验证集的词的时候将该参数置为True, 或将验证集和测试集
 传入 `no_create_entry_dataset` 参数。它们的意义是在接下来的模型会使用pretrain的embedding(包括glove, word2vec, elmo与bert)且会finetune的
 情况下，如果仅使用来自于train的数据建立vocabulary，会导致只出现在test与dev中的词语无法充分利用到来自于预训练embedding的信息(因为他们
 会被认为是unk)，所以在建立词表的时候将test与dev考虑进来会使得最终的结果更好。通过与fastNLP中的各种Embedding配合使用，会有如下的效果，
--- a/docs/source/tutorials/tutorial_3_embedding.rst
+++ b/docs/source/tutorials/tutorial_3_embedding.rst
@@ -187,7 +187,7 @@ BertEmbedding的使用
    torch.Size([1, 7, 768])

 在英文Bert模型中，一个英文单词可能会被切分为多个subword，例如"fairness"会被拆分为 ``["fair", "##ness"]`` ，这样一个word对应的将有两个输出，
 :class:`~fastNLP.embeddings.BertEmbedding` 会使用pooling方法将一个word的subword的表示合并成一个vector，通过pool_method可以控制
 :class:`~fastNLP.embeddings.BertEmbedding` 会使用pooling方法将一个word的subword的表示合并成一个vector，通过pool_method可以控制
 该pooling方法，支持的有"first"(即使用fair的表示作为fairness的表示), "last"(使用##ness的表示作为fairness的表示), "max"(对fair和
 ##ness在每一维上做max),"avg"(对fair和##ness每一维做average)。

@@ -200,8 +200,8 @@ BertEmbedding的使用

    torch.Size([1, 5, 768])

 另外，根据 `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
 <https://arxiv.org/abs/1810.04805>`_ ，Bert在针对具有两句话的任务时（如matching，Q&A任务），句子之间通过[SEP]拼接起来，前一句话的token embedding为0，
 另外，根据 `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding <https://arxiv.org/abs/1810.04805>`_ ，
 Bert在针对具有两句话的任务时（如matching，Q&A任务），句子之间通过[SEP]拼接起来，前一句话的token embedding为0，
 后一句话的token embedding为1。BertEmbedding能够自动识别句子中间的[SEP]来正确设置对应的token_type_id的。

 .. code-block:: python
@@ -230,7 +230,7 @@ Part VI: 使用character-level的embedding
 -----------------------------------------------------

 除了预训练的embedding以外，fastNLP还提供了两种Character Embedding： :class:`~fastNLP.embeddings.CNNCharEmbedding` 和
 :class:`~fastNLP.embeddings.LSTMCharEmbedding` 。一般在使用character embedding时，需要在预处理的时候将word拆分成character，这
 :class:`~fastNLP.embeddings.LSTMCharEmbedding` 。一般在使用character embedding时，需要在预处理的时候将word拆分成character，这
 会使得预处理过程变得非常繁琐。在fastNLP中，使用character embedding也只需要传入 :class:`~fastNLP.Vocabulary` 即可，而且该
 Vocabulary与其它Embedding使用的Vocabulary是一致的，下面我们看两个例子。

@@ -298,11 +298,12 @@ Part VII: 叠加使用多个embedding

    torch.Size([1, 5, 114])

 :class:`~fastNLP.embeddings.StaticEmbedding` , :class:`~fastNLP.embeddings.ElmoEmbedding` ,
 :class:`~fastNLP.embeddings.CNNCharEmbedding` , :class:`~fastNLP.embeddings.BertEmbedding` 等都可以互相拼接。
 :class:`~fastNLP.embeddings.StackEmbedding` 的使用也是和其它Embedding是一致的，即输出index返回对应的表示。但能够拼接起来的Embedding
 :class:`~fastNLP.embeddings.StaticEmbedding` , :class:`~fastNLP.embeddings.ElmoEmbedding` ,
 :class:`~fastNLP.embeddings.CNNCharEmbedding` , :class:`~fastNLP.embeddings.BertEmbedding` 等都可以互相拼接。
 :class:`~fastNLP.embeddings.StackEmbedding` 的使用也是和其它Embedding是一致的，即输出index返回对应的表示。但能够拼接起来的Embedding
 必须使用同样的 :class:`~fastNLP.Vocabulary` ，因为只有使用同样的 :class:`~fastNLP.Vocabulary` 才能保证同一个index指向的是同一个词或字


 -----------------------------------------------------------
 Part VIII: Embedding的其它说明
 -----------------------------------------------------------
--- a/docs/source/tutorials/tutorial_4_load_dataset.rst
+++ b/docs/source/tutorials/tutorial_4_load_dataset.rst
@@ -20,7 +20,7 @@ Part I: 数据集容器DataBundle
 来承载同一个任务的多个数据集 :class:`~fastNLP.DataSet` 以及它们的词表 :class:`~fastNLP.Vocabulary` 。下面会有例子介绍 :class:`~fastNLP.io.DataBundle`
 的相关使用。

 :class:`~fastNLP.io.DataBundle` 在fastNLP中主要在各个 :class:`~fastNLP.io.Loader` 和 :class:`~fastNLP.io.Pipe` 中被使用。
 :class:`~fastNLP.io.DataBundle` 在fastNLP中主要在各个 :class:`~fastNLP.io.Loader` 和 :class:`~fastNLP.io.Pipe` 中被使用。
 下面我们先介绍一下 :class:`~fastNLP.io.Loader` 和 :class:`~fastNLP.io.Pipe` 。

 Part II: 加载的各种数据集的Loader
--- a/fastNLP/io/init.py
+++ b/fastNLP/io/init.py
@@ -47,7 +47,7 @@ __all__ = [
    "SNLILoader",
    "QNLILoader",
    "RTELoader",
    "XNLILoader",
    "CNXNLILoader",
    "BQCorpusLoader",
    "LCQMCLoader",

@@ -70,32 +70,61 @@ __all__ = [
    "WeiboNERPipe",

    "CWSPipe",

    
    "Pipe",
    
    "CWSPipe",
    
    "YelpFullPipe",
    "YelpPolarityPipe",
    "SSTPipe",
    "SST2Pipe",
    "IMDBPipe",
    "ChnSentiCorpPipe",
    "THUCNewsPipe",
    "WeiboSenti100kPipe",
    
    "Conll2003NERPipe",
    "OntoNotesNERPipe",
    "MsraNERPipe",
    "WeiboNERPipe",
    "PeopleDailyPipe",
    "Conll2003Pipe",
    
    "MatchingBertPipe",
    "RTEBertPipe",
    "SNLIBertPipe",
    "QuoraBertPipe",
    "QNLIBertPipe",
    "MNLIBertPipe",
    "CNXNLIBertPipe",
    "BQCorpusBertPipe",
    "LCQMCBertPipe",
    "MatchingPipe",
    "RTEPipe",
    "SNLIPipe",
    "QuoraPipe",
    "QNLIPipe",
    "MNLIPipe",
    "LCQMCPipe",
    "CNXNLIPipe",
    "BQCorpusPipe",
    "RenamePipe",
    "GranularizePipe",
    "MachingTruncatePipe",

    'ModelLoader',
    'ModelSaver',

 ]

 from .embed_loader import EmbedLoader
 from .data_bundle import DataBundle
 from .model_io import ModelLoader, ModelSaver
 import sys

 from .data_bundle import DataBundle
 from .embed_loader import EmbedLoader
 from .loader import *
 from .model_io import ModelLoader, ModelSaver
 from .pipe import *

 import sys
 from ..doc_utils import doc_process

 doc_process(sys.modules[__name__])
--- a/fastNLP/io/loader/init.py
+++ b/fastNLP/io/loader/init.py
@@ -54,7 +54,9 @@ __all__ = [
    'SSTLoader',
    'SST2Loader',
    "ChnSentiCorpLoader",

    "THUCNewsLoader",
    "WeiboSenti100kLoader",
    
    'ConllLoader',
    'Conll2003Loader',
    'Conll2003NERLoader',
@@ -63,26 +65,31 @@ __all__ = [
    "MsraNERLoader",
    "PeopleDailyNERLoader",
    "WeiboNERLoader",

    
    'CSVLoader',
    'JsonLoader',

    
    'CWSLoader',

    
    'MNLILoader',
    "QuoraLoader",
    "SNLILoader",
    "QNLILoader",
    "RTELoader",

    "CNXNLILoader",
    "BQCorpusLoader",
    "LCQMCLoader",
    
    "CoReferenceLoader"
 ]
 from .classification import YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader
 from .classification import YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, \
    ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader
 from .conll import ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader
 from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader
 from .coreference import CoReferenceLoader
 from .csv import CSVLoader
 from .cws import CWSLoader
 from .json import JsonLoader
 from .loader import Loader
 from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader
 from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader
 from .coreference import CoReferenceLoader
 from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, \
    LCQMCLoader
--- a/fastNLP/io/loader/classification.py
+++ b/fastNLP/io/loader/classification.py
@@ -409,6 +409,7 @@ class THUCNewsLoader(Loader):

    .. csv-table::
       :header: "raw_words", "target"
       
       "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育"
       "...", "..."

@@ -446,13 +447,18 @@ class WeiboSenti100kLoader(Loader):
    别名：
    数据集简介：微博sentiment classification，二分类
    原始数据内容为：
    label   text
    0   六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]
    1   听过一场！笑死了昂，一听茄子脱口秀，从此节操是路人！[嘻嘻] //@中国梦网官微:@Pencil彭赛 @茄子脱口秀 [圣诞帽][圣诞树][平安果]
    
    .. .. code-block:: text
    
        label   text
        0   六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]
        1   听过一场！笑死了昂，一听茄子脱口秀，从此节操是路人！[嘻嘻] //@中国梦网官微:@Pencil彭赛 @茄子脱口秀 [圣诞帽][圣诞树][平安果]
    
    读取后的Dataset将具有以下数据结构：

    .. csv-table::
       :header: "raw_chars", "target"
       
       "六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0"
       "...", "..."

--- a/fastNLP/io/loader/matching.py
+++ b/fastNLP/io/loader/matching.py
@@ -15,14 +15,14 @@ import os
 import warnings
 from typing import Union, Dict

 from .csv import CSVLoader
 from .json import JsonLoader
 from .loader import Loader
 from .. import DataBundle
 from ..utils import check_loader_paths
 from ...core.const import Const
 from ...core.dataset import DataSet
 from ...core.instance import Instance
 from .csv import CSVLoader
 from ..utils import check_loader_paths


 class MNLILoader(Loader):
@@ -348,8 +348,9 @@ class CNXNLILoader(Loader):

    .. csv-table::
       :header: "raw_chars1", "raw_chars2", "target"
       
       "从概念上看,奶油收入有两个基本方面产品和地理.", "产品和地理是什么使奶油抹霜工作.", "1"
       ""...", "...", "..."
       "...", "...", "..."

    """

@@ -412,6 +413,7 @@ class BQCorpusLoader(Loader):

    .. csv-table::
       :header: "raw_chars1", "raw_chars2", "target"
       
       "不是邀请的如何贷款？", "我不是你们邀请的客人可以贷款吗？", "1"
       "如何满足微粒银行的审核", "建设银行有微粒贷的资格吗", "0"
       "...", "...", "..."
@@ -448,20 +450,26 @@ class BQCorpusLoader(Loader):


 class LCQMCLoader(Loader):
    """
    别名：
    r"""
    数据集简介：句对匹配（question matching）
    
    原始数据为：
    '喜欢打篮球的男生喜欢什么样的女生\t爱打篮球的男生喜欢什么样的女生\t1\n'
    '晚上睡觉带着耳机听音乐有什么害处吗？\t孕妇可以戴耳机听音乐吗?\t0\n'
    读取后的Dataset将具有以下的数据结构：

    
    .. code-block:: text
    
        '喜欢打篮球的男生喜欢什么样的女生\t爱打篮球的男生喜欢什么样的女生\t1\n'
        '晚上睡觉带着耳机听音乐有什么害处吗？\t孕妇可以戴耳机听音乐吗?\t0\n'
    
    读取后的Dataset将具有以下的数据结构
    
    .. csv-table::
       :header: "raw_chars1", "raw_chars2", "target"
       
       "喜欢打篮球的男生喜欢什么样的女生？", "爱打篮球的男生喜欢什么样的女生？", "1"
       "晚上睡觉带着耳机听音乐有什么害处吗？", "妇可以戴耳机听音乐吗?", "0"
       ""...", "...", "..."

       "...", "...", "..."
    
    
    """

    def __init__(self):
--- a/fastNLP/io/pipe/init.py
+++ b/fastNLP/io/pipe/init.py
@@ -9,9 +9,9 @@ Pipe用于处理通过 Loader 读取的数据，所有的 Pipe 都包含 ``proce
 """
 __all__ = [
    "Pipe",

    
    "CWSPipe",

    
    "YelpFullPipe",
    "YelpPolarityPipe",
    "SSTPipe",
@@ -20,35 +20,46 @@ __all__ = [
    "ChnSentiCorpPipe",
    "THUCNewsPipe",
    "WeiboSenti100kPipe",

    
    "Conll2003NERPipe",
    "OntoNotesNERPipe",
    "MsraNERPipe",
    "WeiboNERPipe",
    "PeopleDailyPipe",
    "Conll2003Pipe",

    
    "MatchingBertPipe",
    "RTEBertPipe",
    "SNLIBertPipe",
    "QuoraBertPipe",
    "QNLIBertPipe",
    "MNLIBertPipe",
    "CNXNLIBertPipe",
    "BQCorpusBertPipe",
    "LCQMCBertPipe",
    "MatchingPipe",
    "RTEPipe",
    "SNLIPipe",
    "QuoraPipe",
    "QNLIPipe",
    "MNLIPipe",

    "LCQMCPipe",
    "CNXNLIPipe",
    "BQCorpusPipe",
    "RenamePipe",
    "GranularizePipe",
    "MachingTruncatePipe",
    
    "CoReferencePipe"
 ]

 from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe
 from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, \
    WeiboSenti100kPipe
 from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe
 from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \
    MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe
 from .pipe import Pipe
 from .conll import Conll2003Pipe
 from .cws import CWSPipe
 from .coreference import CoReferencePipe
 from .cws import CWSPipe
 from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \
    MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CNXNLIBertPipe, CNXNLIPipe, BQCorpusBertPipe, \
    LCQMCPipe, BQCorpusPipe, LCQMCBertPipe, RenamePipe, GranularizePipe, MachingTruncatePipe
 from .pipe import Pipe
--- a/fastNLP/io/pipe/classification.py
+++ b/fastNLP/io/pipe/classification.py
@@ -21,11 +21,11 @@ from .utils import get_tokenizer, _indexize, _add_words_field, _drop_empty_insta
 from ..data_bundle import DataBundle
 from ..loader.classification import ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader
 from ..loader.classification import IMDBLoader, YelpFullLoader, SSTLoader, SST2Loader, YelpPolarityLoader
 from ...core._logger import logger
 from ...core.const import Const
 from ...core.dataset import DataSet
 from ...core.instance import Instance
 from ...core.vocabulary import Vocabulary
 from ...core._logger import logger

 nonalpnum = re.compile('[^0-9a-zA-Z?!\']+')

@@ -718,6 +718,7 @@ class THUCNewsPipe(_CLSPipe):

        .. csv-table::
            :header: "raw_words", "target"
            
            "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育"
            "...", "..."

@@ -826,6 +827,7 @@ class WeiboSenti100kPipe(_CLSPipe):

        .. csv-table::
            :header: "raw_chars", "target"
            
            "六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0"
            "...", "..."

--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -16,20 +16,24 @@ __all__ = [
    "QuoraPipe",
    "QNLIPipe",
    "MNLIPipe",
    "LCQMCPipe",
    "CNXNLIPipe",
    "BQCorpusPipe",
    "LCQMCPipe",
    "RenamePipe",
    "GranularizePipe",
    "MachingTruncatePipe",
 ]

 import warnings

 from .pipe import Pipe
 from .utils import get_tokenizer
 from ..loader.matching import SNLILoader, MNLILoader, QNLILoader, RTELoader, QuoraLoader, BQCorpusLoader, CNXNLILoader, LCQMCLoader
 from ..data_bundle import DataBundle
 from ..loader.matching import SNLILoader, MNLILoader, QNLILoader, RTELoader, QuoraLoader, BQCorpusLoader, CNXNLILoader, \
    LCQMCLoader
 from ...core._logger import logger
 from ...core.const import Const
 from ...core.vocabulary import Vocabulary
 from ...core._logger import logger
 from ..data_bundle import DataBundle


 class MatchingBertPipe(Pipe):
@@ -145,7 +149,7 @@ class MatchingBertPipe(Pipe):
                       f"data set but not in train data set!."
            warnings.warn(warn_msg)
            logger.warning(warn_msg)

        
        has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if
                               dataset.has_field(Const.TARGET)]
        target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET)
@@ -294,7 +298,7 @@ class MatchingPipe(Pipe):
                       f"data set but not in train data set!."
            warnings.warn(warn_msg)
            logger.warning(warn_msg)

        
        has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if
                               dataset.has_field(Const.TARGET)]
        target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET)
@@ -345,8 +349,9 @@ class MNLIPipe(MatchingPipe):
        data_bundle = MNLILoader().load(paths)
        return self.process(data_bundle)


 class LCQMCPipe(MatchingPipe):
    def process_from_file(self, paths = None):
    def process_from_file(self, paths=None):
        data_bundle = LCQMCLoader().load(paths)
        data_bundle = RenamePipe().process(data_bundle)
        data_bundle = self.process(data_bundle)
@@ -358,14 +363,14 @@ class CNXNLIPipe(MatchingPipe):
    def process_from_file(self, paths=None):
        data_bundle = CNXNLILoader().load(paths)
        data_bundle = GranularizePipe(task='XNLI').process(data_bundle)
        data_bundle = RenamePipe().process(data_bundle) #使中文数据的field
        data_bundle = RenamePipe().process(data_bundle)  # 使中文数据的field
        data_bundle = self.process(data_bundle)
        data_bundle = RenamePipe().process(data_bundle)
        return data_bundle


 class BQCorpusPipe(MatchingPipe):
    def process_from_file(self, paths = None):
    def process_from_file(self, paths=None):
        data_bundle = BQCorpusLoader().load(paths)
        data_bundle = RenamePipe().process(data_bundle)
        data_bundle = self.process(data_bundle)
@@ -374,12 +379,12 @@ class BQCorpusPipe(MatchingPipe):


 class RenamePipe(Pipe):
    def __init__(self, task = 'cn-nli'):
    def __init__(self, task='cn-nli'):
        super().__init__()
        self.task = task

    
    def process(self, data_bundle: DataBundle):  # rename field name for Chinese Matching dataset
        if(self.task == 'cn-nli'):
        if (self.task == 'cn-nli'):
            for name, dataset in data_bundle.datasets.items():
                if (dataset.has_field(Const.RAW_CHARS(0))):
                    dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0))  # RAW_CHARS->RAW_WORDS
@@ -392,12 +397,12 @@ class RenamePipe(Pipe):
                else:
                    raise RuntimeError(
                        "field name of dataset is not qualified. It should have ether RAW_CHARS or WORDS")
        elif(self.task == 'cn-nli-bert'):
        elif (self.task == 'cn-nli-bert'):
            for name, dataset in data_bundle.datasets.items():
                if (dataset.has_field(Const.RAW_CHARS(0))):
                    dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0))  # RAW_CHARS->RAW_WORDS
                    dataset.rename_field(Const.RAW_CHARS(1), Const.RAW_WORDS(1))
                elif(dataset.has_field(Const.RAW_WORDS(0))):
                elif (dataset.has_field(Const.RAW_WORDS(0))):
                    dataset.rename_field(Const.RAW_WORDS(0), Const.RAW_CHARS(0))
                    dataset.rename_field(Const.RAW_WORDS(1), Const.RAW_CHARS(1))
                    dataset.rename_field(Const.INPUT, Const.CHAR_INPUT)
@@ -409,15 +414,15 @@ class RenamePipe(Pipe):
            raise RuntimeError(
                "Only support task='cn-nli' or 'cn-nli-bert'"
            )

        
        return data_bundle


 class GranularizePipe(Pipe):
    def __init__(self, task = None):
    def __init__(self, task=None):
        super().__init__()
        self.task = task

    
    def _granularize(self, data_bundle, tag_map):
        """
        该函数对data_bundle中'target'列中的内容进行转换。
@@ -434,21 +439,22 @@ class GranularizePipe(Pipe):
            dataset.drop(lambda ins: ins[Const.TARGET] == -100)
            data_bundle.set_dataset(dataset, name)
        return data_bundle

    
    def process(self, data_bundle: DataBundle):
        task_tag_dict = {
            'XNLI':{'neutral': 0, 'entailment': 1, 'contradictory': 2, 'contradiction': 2}
            'XNLI': {'neutral': 0, 'entailment': 1, 'contradictory': 2, 'contradiction': 2}
        }
        if self.task in task_tag_dict:
            data_bundle = self._granularize(data_bundle=data_bundle, tag_map= task_tag_dict[self.task])
            data_bundle = self._granularize(data_bundle=data_bundle, tag_map=task_tag_dict[self.task])
        else:
            raise RuntimeError(f"Only support {task_tag_dict.keys()} task_tag_map.")
        return data_bundle


 class MachingTruncatePipe(Pipe): #truncate sentence for bert, modify seq_len
 class MachingTruncatePipe(Pipe):  # truncate sentence for bert, modify seq_len
    def __init__(self):
        super().__init__()
    
    def process(self, data_bundle: DataBundle):
        for name, dataset in data_bundle.datasets.items():
            pass
@@ -456,7 +462,7 @@ class MachingTruncatePipe(Pipe): #truncate sentence for bert, modify seq_len


 class LCQMCBertPipe(MatchingBertPipe):
    def process_from_file(self, paths = None):
    def process_from_file(self, paths=None):
        data_bundle = LCQMCLoader().load(paths)
        data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle)
        data_bundle = self.process(data_bundle)
@@ -465,7 +471,7 @@ class LCQMCBertPipe(MatchingBertPipe):


 class BQCorpusBertPipe(MatchingBertPipe):
    def process_from_file(self, paths = None):
    def process_from_file(self, paths=None):
        data_bundle = BQCorpusLoader().load(paths)
        data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle)
        data_bundle = self.process(data_bundle)
@@ -474,7 +480,7 @@ class BQCorpusBertPipe(MatchingBertPipe):


 class CNXNLIBertPipe(MatchingBertPipe):
    def process_from_file(self, paths = None):
    def process_from_file(self, paths=None):
        data_bundle = CNXNLILoader().load(paths)
        data_bundle = GranularizePipe(task='XNLI').process(data_bundle)
        data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle)
--- a/fastNLP/modules/encoder/attention.py
+++ b/fastNLP/modules/encoder/attention.py
@@ -152,8 +152,7 @@ class BiAttention(nn.Module):
        :param torch.Tensor premise_mask: [batch_size, a_seq_len]
        :param torch.Tensor hypothesis_batch: [batch_size, b_seq_len, hidden_size]
        :param torch.Tensor hypothesis_mask: [batch_size, b_seq_len]
        :return: torch.Tensor attended_premises: [batch_size, a_seq_len, hidden_size]
        torch.Tensor attended_hypotheses: [batch_size, b_seq_len, hidden_size]
        :return: torch.Tensor attended_premises: [batch_size, a_seq_len, hidden_size] torch.Tensor attended_hypotheses: [batch_size, b_seq_len, hidden_size]
        """
        similarity_matrix = premise_batch.bmm(hypothesis_batch.transpose(2, 1)
                                              .contiguous())
--- a/tutorials/quickstart.ipynb
+++ b/tutorials/quickstart.ipynb
@@ -1,280 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 快速入门"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP.io import CSVLoader\n",
    "\n",
    "loader = CSVLoader(headers=('raw_sentence', 'label'), sep='\\t')\n",
    "dataset = loader.load(\"./sample_data/tutorial_sample_dataset.csv\")\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str,\n",
       "'sentence': a series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'words': ['a', 'series', 'of', 'escapades', 'demonstrating', 'the', 'adage', 'that', 'what', 'is', 'good', 'for', 'the', 'goose', 'is', 'also', 'good', 'for', 'the', 'gander', ',', 'some', 'of', 'which', 'occasionally', 'amuses', 'but', 'none', 'of', 'which', 'amounts', 'to', 'much', 'of', 'a', 'story', '.'] type=list}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将所有字母转为小写, 并所有句子变成单词序列\n",
    "dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='sentence')\n",
    "dataset.apply(lambda x: x['sentence'].split(), new_field_name='words', is_input=True)\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str,\n",
       "'sentence': a series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'words': [4, 1, 6, 1, 1, 2, 1, 11, 153, 10, 28, 17, 2, 1, 10, 1, 28, 17, 2, 1, 5, 154, 6, 149, 1, 1, 23, 1, 6, 149, 1, 8, 30, 6, 4, 35, 3] type=list}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Vocabulary\n",
    "\n",
    "# 使用Vocabulary类统计单词，并将单词序列转化为数字序列\n",
    "vocab = Vocabulary(min_freq=2).from_dataset(dataset, field_name='words')\n",
    "vocab.index_dataset(dataset, field_name='words',new_field_name='words')\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str,\n",
       "'sentence': a series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'words': [4, 1, 6, 1, 1, 2, 1, 11, 153, 10, 28, 17, 2, 1, 10, 1, 28, 17, 2, 1, 5, 154, 6, 149, 1, 1, 23, 1, 6, 149, 1, 8, 30, 6, 4, 35, 3] type=list,\n",
       "'target': 1 type=int}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将label转为整数，并设置为 target\n",
    "dataset.apply(lambda x: int(x['label']), new_field_name='target', is_target=True)\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CNNText(\n",
       "  (embed): Embedding(\n",
       "    177, 50\n",
       "    (dropout): Dropout(p=0.0)\n",
       "  )\n",
       "  (conv_pool): ConvMaxpool(\n",
       "    (convs): ModuleList(\n",
       "      (0): Conv1d(50, 3, kernel_size=(3,), stride=(1,), padding=(2,))\n",
       "      (1): Conv1d(50, 4, kernel_size=(4,), stride=(1,), padding=(2,))\n",
       "      (2): Conv1d(50, 5, kernel_size=(5,), stride=(1,), padding=(2,))\n",
       "    )\n",
       "  )\n",
       "  (dropout): Dropout(p=0.1)\n",
       "  (fc): Linear(in_features=12, out_features=5, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP.models import CNNText\n",
    "model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(62, 15)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 分割训练集/验证集\n",
    "train_data, dev_data = dataset.split(0.2)\n",
    "len(train_data), len(dev_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input fields after batch(if batch size is 2):\n",
      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 26]) \n",
      "target fields after batch(if batch size is 2):\n",
      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "\n",
      "training epochs started 2019-05-09-10-59-39\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=20), HTML(value='')), layout=Layout(display='…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.333333\n",
      "\n",
      "Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.533333\n",
      "\n",
      "Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.533333\n",
      "\n",
      "Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.533333\n",
      "\n",
      "Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.6\n",
      "\n",
      "Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.8\n",
      "\n",
      "Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.8\n",
      "\n",
      "Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.733333\n",
      "\n",
      "Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.733333\n",
      "\n",
      "Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.733333\n",
      "\n",
      "\n",
      "In Epoch:6/Step:12, got best dev performance:AccuracyMetric: acc=0.8\n",
      "Reloaded the best model.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'best_eval': {'AccuracyMetric': {'acc': 0.8}},\n",
       " 'best_epoch': 6,\n",
       " 'best_step': 12,\n",
       " 'seconds': 0.22}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric\n",
    "\n",
    "# 定义trainer并进行训练\n",
    "trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data,\n",
    "                  loss=CrossEntropyLoss(), metrics=AccuracyMetric())\n",
    "trainer.train()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
--- a/tutorials/sample_data/tutorial_sample_dataset.csv
+++ b/tutorials/sample_data/tutorial_sample_dataset.csv
@@ -1,77 +0,0 @@
 A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .	1
 This quiet , introspective and entertaining independent is worth seeking .	4
 Even fans of Ismail Merchant 's work , I suspect , would have a hard time sitting through this one .	1
 A positively thrilling combination of ethnography and all the intrigue , betrayal , deceit and murder of a Shakespearean tragedy or a juicy soap opera .	3
 Aggressive self-glorification and a manipulative whitewash .	1
 A comedy-drama of nearly epic proportions rooted in a sincere performance by the title character undergoing midlife crisis .	4
 Narratively , Trouble Every Day is a plodding mess .	1
 The Importance of Being Earnest , so thick with wit it plays like a reading from Bartlett 's Familiar Quotations	3
 But it does n't leave you with much .	1
 You could hate it for the same reason .	1
 There 's little to recommend Snow Dogs , unless one considers cliched dialogue and perverse escapism a source of high hilarity .	1
 Kung Pow is Oedekerk 's realization of his childhood dream to be in a martial-arts flick , and proves that sometimes the dreams of youth should remain just that .	1
 The performances are an absolute joy .	4
 Fresnadillo has something serious to say about the ways in which extravagant chance can distort our perspective and throw us off the path of good sense .	3
 I still like Moonlight Mile , better judgment be damned .	3
 A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story .	3
 a bilingual charmer , just like the woman who inspired it	3
 Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting .	2
 As inept as big-screen remakes of The Avengers and The Wild Wild West .	1
 It 's everything you 'd expect -- but nothing more .	2
 Best indie of the year , so far .	4
 Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications .	3
 It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend .	1
 That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is .	2
 The plot is romantic comedy boilerplate from start to finish .	2
 It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications .	2
 A film that clearly means to preach exclusively to the converted .	2
 While The Importance of Being Earnest offers opportunities for occasional smiles and chuckles , it does n't give us a reason to be in the theater beyond Wilde 's wit and the actors ' performances .	1
 The latest vapid actor 's exercise to appropriate the structure of Arthur Schnitzler 's Reigen .	1
 More vaudeville show than well-constructed narrative , but on those terms it 's inoffensive and actually rather sweet .	2
 Nothing more than a run-of-the-mill action flick .	2
 Hampered -- no , paralyzed -- by a self-indulgent script ... that aims for poetry and ends up sounding like satire .	0
 Ice Age is the first computer-generated feature cartoon to feel like other movies , and that makes for some glacial pacing early on .	2
 There 's very little sense to what 's going on here , but the makers serve up the cliches with considerable dash .	2
 Cattaneo should have followed the runaway success of his first film , The Full Monty , with something different .	2
 They 're the unnamed , easily substitutable forces that serve as whatever terror the heroes of horror movies try to avoid .	1
 It almost feels as if the movie is more interested in entertaining itself than in amusing us .	1
 The movie 's progression into rambling incoherence gives new meaning to the phrase ` fatal script error . '	0
 I still like Moonlight Mile , better judgment be damned .	3
 A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story .	3
 a bilingual charmer , just like the woman who inspired it	3
 Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting .	2
 As inept as big-screen remakes of The Avengers and The Wild Wild West .	1
 It 's everything you 'd expect -- but nothing more .	2
 Best indie of the year , so far .	4
 Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications .	3
 It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend .	1
 That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is .	2
 The plot is romantic comedy boilerplate from start to finish .	2
 It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications .	2
 A film that clearly means to preach exclusively to the converted .	2
 I still like Moonlight Mile , better judgment be damned .	3
 A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story .	3
 a bilingual charmer , just like the woman who inspired it	3
 Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting .	2
 As inept as big-screen remakes of The Avengers and The Wild Wild West .	1
 It 's everything you 'd expect -- but nothing more .	2
 Best indie of the year , so far .	4
 Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications .	3
 It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend .	1
 That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is .	2
 The plot is romantic comedy boilerplate from start to finish .	2
 It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications .	2
 A film that clearly means to preach exclusively to the converted .	2
 I still like Moonlight Mile , better judgment be damned .	3
 A welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story .	3
 a bilingual charmer , just like the woman who inspired it	3
 Like a less dizzily gorgeous companion to Mr. Wong 's In the Mood for Love -- very much a Hong Kong movie despite its mainland setting .	2
 As inept as big-screen remakes of The Avengers and The Wild Wild West .	1
 It 's everything you 'd expect -- but nothing more .	2
 Best indie of the year , so far .	4
 Hatfield and Hicks make the oddest of couples , and in this sense the movie becomes a study of the gambles of the publishing world , offering a case study that exists apart from all the movie 's political ramifications .	3
 It 's like going to a house party and watching the host defend himself against a frothing ex-girlfriend .	1
 That the Chuck Norris `` grenade gag '' occurs about 7 times during Windtalkers is a good indication of how serious-minded the film is .	2
 The plot is romantic comedy boilerplate from start to finish .	2
 It arrives with an impeccable pedigree , mongrel pep , and almost indecipherable plot complications .	2
 A film that clearly means to preach exclusively to the converted .	2
--- a/tutorials/tutorial_1.ipynb
+++ b/tutorials/tutorial_1.ipynb
@@ -1,831 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 详细指南"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据读入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP.io import CSVLoader\n",
    "\n",
    "loader = CSVLoader(headers=('raw_sentence', 'label'), sep='\\t')\n",
    "dataset = loader.load(\"./sample_data/tutorial_sample_dataset.csv\")\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Instance表示一个样本，由一个或多个field（域，属性，特征）组成，每个field有名字和值。\n",
    "\n",
    "在初始化Instance时即可定义它包含的域，使用 \"field_name=field_value\"的写法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': fake data type=str,\n",
       "'label': 0 type=str}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Instance\n",
    "\n",
    "dataset.append(Instance(raw_sentence='fake data', label='0'))\n",
    "dataset[-1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'label': 1 type=str,\n",
       "'sentence': a series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
       "'words': [4, 1, 6, 1, 1, 2, 1, 11, 153, 10, 28, 17, 2, 1, 10, 1, 28, 17, 2, 1, 5, 154, 6, 149, 1, 1, 23, 1, 6, 149, 1, 8, 30, 6, 4, 35, 3] type=list,\n",
       "'target': 1 type=int}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Vocabulary\n",
    "\n",
    "# 将所有字母转为小写, 并所有句子变成单词序列\n",
    "dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='sentence')\n",
    "dataset.apply_field(lambda x: x.split(), field_name='sentence', new_field_name='words')\n",
    "\n",
    "# 使用Vocabulary类统计单词，并将单词序列转化为数字序列\n",
    "vocab = Vocabulary(min_freq=2).from_dataset(dataset, field_name='words')\n",
    "vocab.index_dataset(dataset, field_name='words',new_field_name='words')\n",
    "\n",
    "# 将label转为整数\n",
    "dataset.apply(lambda x: int(x['label']), new_field_name='target')\n",
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
      "'label': 1 type=str,\n",
      "'sentence': a series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story . type=str,\n",
      "'words': [4, 1, 6, 1, 1, 2, 1, 11, 153, 10, 28, 17, 2, 1, 10, 1, 28, 17, 2, 1, 5, 154, 6, 149, 1, 1, 23, 1, 6, 149, 1, 8, 30, 6, 4, 35, 3] type=list,\n",
      "'target': 1 type=int,\n",
      "'seq_len': 37 type=int}\n"
     ]
    }
   ],
   "source": [
    "# 增加长度信息\n",
    "dataset.apply_field(lambda x: len(x), field_name='words', new_field_name='seq_len')\n",
    "print(dataset[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用内置模块CNNText\n",
    "设置为符合内置模块的名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CNNText(\n",
       "  (embed): Embedding(\n",
       "    177, 50\n",
       "    (dropout): Dropout(p=0.0)\n",
       "  )\n",
       "  (conv_pool): ConvMaxpool(\n",
       "    (convs): ModuleList(\n",
       "      (0): Conv1d(50, 3, kernel_size=(3,), stride=(1,), padding=(2,))\n",
       "      (1): Conv1d(50, 4, kernel_size=(4,), stride=(1,), padding=(2,))\n",
       "      (2): Conv1d(50, 5, kernel_size=(5,), stride=(1,), padding=(2,))\n",
       "    )\n",
       "  )\n",
       "  (dropout): Dropout(p=0.1)\n",
       "  (fc): Linear(in_features=12, out_features=5, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP.models import CNNText\n",
    "\n",
    "model_cnn = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)\n",
    "model_cnn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "我们在使用内置模块的时候，还应该使用应该注意把 field 设定成符合内置模型输入输出的名字。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "words\n",
      "seq_len\n",
      "target\n"
     ]
    }
   ],
   "source": [
    "from fastNLP import Const\n",
    "\n",
    "dataset.rename_field('words', Const.INPUT)\n",
    "dataset.rename_field('seq_len', Const.INPUT_LEN)\n",
    "dataset.rename_field('target', Const.TARGET)\n",
    "\n",
    "dataset.set_input(Const.INPUT, Const.INPUT_LEN)\n",
    "dataset.set_target(Const.TARGET)\n",
    "\n",
    "print(Const.INPUT)\n",
    "print(Const.INPUT_LEN)\n",
    "print(Const.TARGET)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 分割训练集/验证集/测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(64, 7, 7)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_dev_data, test_data = dataset.split(0.1)\n",
    "train_data, dev_data = train_dev_data.split(0.1)\n",
    "len(train_data), len(dev_data), len(test_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练(model_cnn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### loss\n",
    "训练模型需要提供一个损失函数\n",
    "\n",
    "下面提供了一个在分类问题中常用的交叉熵损失。注意它的**初始化参数**。\n",
    "\n",
    "pred参数对应的是模型的forward返回的dict的一个key的名字，这里是\"output\"。\n",
    "\n",
    "target参数对应的是dataset作为标签的field的名字，这里是\"label_seq\"。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP import CrossEntropyLoss\n",
    "\n",
    "# loss = CrossEntropyLoss()\n",
    "# 等价于\n",
    "loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Metric\n",
    "定义评价指标\n",
    "\n",
    "这里使用准确率。参数的“命名规则”跟上面类似。\n",
    "\n",
    "pred参数对应的是模型的predict方法返回的dict的一个key的名字，这里是\"predict\"。\n",
    "\n",
    "target参数对应的是dataset作为标签的field的名字，这里是\"label_seq\"。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastNLP import AccuracyMetric\n",
    "\n",
    "# metrics=AccuracyMetric()\n",
    "# 等价于\n",
    "metrics=AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input fields after batch(if batch size is 2):\n",
      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 16]) \n",
      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "target fields after batch(if batch size is 2):\n",
      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "\n",
      "training epochs started 2019-05-12-21-38-34\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=20), HTML(value='')), layout=Layout(display='…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.285714\n",
      "\n",
      "Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.428571\n",
      "\n",
      "Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "\n",
      "In Epoch:8/Step:16, got best dev performance:AccuracyMetric: acc=0.857143\n",
      "Reloaded the best model.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'best_eval': {'AccuracyMetric': {'acc': 0.857143}},\n",
       " 'best_epoch': 8,\n",
       " 'best_step': 16,\n",
       " 'seconds': 0.21}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Trainer\n",
    "\n",
    "trainer = Trainer(model=model_cnn, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics)\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 测试(model_cnn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[tester] \n",
      "AccuracyMetric: acc=0.857143\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'AccuracyMetric': {'acc': 0.857143}}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Tester\n",
    "\n",
    "tester = Tester(test_data, model_cnn, metrics=AccuracyMetric())\n",
    "tester.test()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 编写自己的模型\n",
    "\n",
    "完全支持 pytorch 的模型，与 pytorch 唯一不同的是返回结果是一个字典，字典中至少需要包含 \"pred\" 这个字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "class LSTMText(nn.Module):\n",
    "    def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):\n",
    "        super().__init__()\n",
    "\n",
    "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
    "        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout)\n",
    "        self.fc = nn.Linear(hidden_dim * 2, output_dim)\n",
    "        self.dropout = nn.Dropout(dropout)\n",
    "\n",
    "    def forward(self, words):\n",
    "        # (input) words : (batch_size, seq_len)\n",
    "        words = words.permute(1,0)\n",
    "        # words : (seq_len, batch_size)\n",
    "\n",
    "        embedded = self.dropout(self.embedding(words))\n",
    "        # embedded : (seq_len, batch_size, embedding_dim)\n",
    "        output, (hidden, cell) = self.lstm(embedded)\n",
    "        # output: (seq_len, batch_size, hidden_dim * 2)\n",
    "        # hidden: (num_layers * 2, batch_size, hidden_dim)\n",
    "        # cell: (num_layers * 2, batch_size, hidden_dim)\n",
    "\n",
    "        hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)\n",
    "        hidden = self.dropout(hidden)\n",
    "        # hidden: (batch_size, hidden_dim * 2)\n",
    "\n",
    "        pred = self.fc(hidden.squeeze(0))\n",
    "        # result: (batch_size, output_dim)\n",
    "        return {\"pred\":pred}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input fields after batch(if batch size is 2):\n",
      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 16]) \n",
      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "target fields after batch(if batch size is 2):\n",
      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "\n",
      "training epochs started 2019-05-12-21-38-36\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=20), HTML(value='')), layout=Layout(display='…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.714286\n",
      "\n",
      "Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "\n",
      "In Epoch:6/Step:12, got best dev performance:AccuracyMetric: acc=0.857143\n",
      "Reloaded the best model.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'best_eval': {'AccuracyMetric': {'acc': 0.857143}},\n",
       " 'best_epoch': 6,\n",
       " 'best_step': 12,\n",
       " 'seconds': 2.15}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_lstm = LSTMText(len(vocab),50,5)\n",
    "trainer = Trainer(model=model_lstm, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics)\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[tester] \n",
      "AccuracyMetric: acc=0.857143\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'AccuracyMetric': {'acc': 0.857143}}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tester = Tester(test_data, model_lstm, metrics=AccuracyMetric())\n",
    "tester.test()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 使用 Batch编写自己的训练过程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0 Avg Loss: 3.11 18ms\n",
      "Epoch 1 Avg Loss: 2.88 30ms\n",
      "Epoch 2 Avg Loss: 2.69 42ms\n",
      "Epoch 3 Avg Loss: 2.47 54ms\n",
      "Epoch 4 Avg Loss: 2.38 67ms\n",
      "Epoch 5 Avg Loss: 2.10 78ms\n",
      "Epoch 6 Avg Loss: 2.06 91ms\n",
      "Epoch 7 Avg Loss: 1.92 103ms\n",
      "Epoch 8 Avg Loss: 1.91 114ms\n",
      "Epoch 9 Avg Loss: 1.76 126ms\n",
      "[tester] \n",
      "AccuracyMetric: acc=0.571429\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'AccuracyMetric': {'acc': 0.571429}}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import BucketSampler\n",
    "from fastNLP import Batch\n",
    "import torch\n",
    "import time\n",
    "\n",
    "model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)\n",
    "\n",
    "def train(epoch, data):\n",
    "    optim = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "    lossfunc = torch.nn.CrossEntropyLoss()\n",
    "    batch_size = 32\n",
    "\n",
    "    # 定义一个Batch，传入DataSet，规定batch_size和去batch的规则。\n",
    "    # 顺序（Sequential），随机（Random），相似长度组成一个batch（Bucket）\n",
    "    train_sampler = BucketSampler(batch_size=batch_size, seq_len_field_name='seq_len')\n",
    "    train_batch = Batch(batch_size=batch_size, dataset=data, sampler=train_sampler)\n",
    "    \n",
    "    start_time = time.time()\n",
    "    for i in range(epoch):\n",
    "        loss_list = []\n",
    "        for batch_x, batch_y in train_batch:\n",
    "            optim.zero_grad()\n",
    "            output = model(batch_x['words'])\n",
    "            loss = lossfunc(output['pred'], batch_y['target'])\n",
    "            loss.backward()\n",
    "            optim.step()\n",
    "            loss_list.append(loss.item())\n",
    "        print('Epoch {:d} Avg Loss: {:.2f}'.format(i, sum(loss_list) / len(loss_list)),end=\" \")\n",
    "        print('{:d}ms'.format(round((time.time()-start_time)*1000)))\n",
    "        loss_list.clear()\n",
    "            \n",
    "train(10, train_data)\n",
    "tester = Tester(test_data, model, metrics=AccuracyMetric())\n",
    "tester.test()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 使用 Callback 实现自己想要的效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input fields after batch(if batch size is 2):\n",
      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 16]) \n",
      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "target fields after batch(if batch size is 2):\n",
      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
      "\n",
      "training epochs started 2019-05-12-21-38-40\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=20), HTML(value='')), layout=Layout(display='…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.285714\n",
      "\n",
      "Sum Time: 51ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.285714\n",
      "\n",
      "Sum Time: 69ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.285714\n",
      "\n",
      "Sum Time: 91ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Sum Time: 107ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Sum Time: 125ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Sum Time: 142ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Sum Time: 158ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.571429\n",
      "\n",
      "Sum Time: 176ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.714286\n",
      "\n",
      "Sum Time: 193ms\n",
      "\n",
      "\n",
      "Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.857143\n",
      "\n",
      "Sum Time: 212ms\n",
      "\n",
      "\n",
      "\n",
      "In Epoch:10/Step:20, got best dev performance:AccuracyMetric: acc=0.857143\n",
      "Reloaded the best model.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'best_eval': {'AccuracyMetric': {'acc': 0.857143}},\n",
       " 'best_epoch': 10,\n",
       " 'best_step': 20,\n",
       " 'seconds': 0.2}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from fastNLP import Callback\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "class MyCallback(Callback):\n",
    "    def on_epoch_end(self):\n",
    "        print('Sum Time: {:d}ms\\n\\n'.format(round((time.time()-start_time)*1000)))\n",
    "        \n",
    "\n",
    "model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)\n",
    "trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data,\n",
    "                  loss=CrossEntropyLoss(), metrics=AccuracyMetric(), callbacks=[MyCallback()])\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
--- a/tutorials/tutorial_10_callback.ipynb
+++ b/tutorials/tutorial_10_callback.ipynb
--- a/tutorials/命名实体识别.ipynb
+++ b/tutorials/命名实体识别.ipynb
@@ -1,41 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "##1. 命名实体识别(name entity recognition, NER)\n",
    "命名实体识别任务是从文本中抽取出具有特殊意义或者指代性非常强的实体，通常包括人名、地名、机构名和时间等。\n",
    "如下面的例子中\n",
    "\n",
    "我来自复旦大学。\n",
    "\n",
    "其中“复旦大学”就是一个机构名，命名实体识别就是要从中识别出“复旦大学”这四个字是一个整体，且属于机构名这个类别。这个问题现在一般被转换为了\n",
    "在本tutorial中我们将通过fastNLP尝试写出一个\n",
    "\n",
    "##2. 数据\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }