conflict solve

6 years ago · 19da1eb070
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -14,7 +14,7 @@ help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

 apidoc:
 	$(SPHINXAPIDOC) -fM -o source ../$(SPHINXPROJ)
 	$(SPHINXAPIDOC) -efM -o source ../$(SPHINXPROJ)

 server:
 	cd build/html && python -m http.server
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -68,7 +68,7 @@ master_doc = 'index'
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
 language = None
 language = "zh_CN"

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -89,7 +89,10 @@ html_theme = 'sphinx_rtd_theme'
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
 # html_theme_options = {}
 html_theme_options = {
    'collapse_navigation': False,
    'titles_only': True
 }

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
--- a/docs/source/fastNLP.api.rst
+++ b/docs/source/fastNLP.api.rst
@@ -1,60 +0,0 @@
 fastNLP.api package
 ===================

 .. automodule:: fastNLP.api
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.api.api module
 ----------------------

 .. automodule:: fastNLP.api.api
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.api.converter module
 ----------------------------

 .. automodule:: fastNLP.api.converter
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.api.examples module
 ---------------------------

 .. automodule:: fastNLP.api.examples
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.api.pipeline module
 ---------------------------

 .. automodule:: fastNLP.api.pipeline
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.api.processor module
 ----------------------------

 .. automodule:: fastNLP.api.processor
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.api.utils module
 ------------------------

 .. automodule:: fastNLP.api.utils
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.automl.rst
+++ b/docs/source/fastNLP.automl.rst
@@ -1,44 +0,0 @@
 fastNLP.automl package
 ======================

 .. automodule:: fastNLP.automl
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.automl.enas\_controller module
 --------------------------------------

 .. automodule:: fastNLP.automl.enas_controller
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.automl.enas\_model module
 ---------------------------------

 .. automodule:: fastNLP.automl.enas_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.automl.enas\_trainer module
 -----------------------------------

 .. automodule:: fastNLP.automl.enas_trainer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.automl.enas\_utils module
 ---------------------------------

 .. automodule:: fastNLP.automl.enas_utils
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.component.rst
+++ b/docs/source/fastNLP.component.rst
@@ -1,20 +0,0 @@
 fastNLP.component package
 =========================

 .. automodule:: fastNLP.component
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.component.bert\_tokenizer module
 ----------------------------------------

 .. automodule:: fastNLP.component.bert_tokenizer
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.core.rst
+++ b/docs/source/fastNLP.core.rst
@@ -1,124 +0,0 @@
 fastNLP.core package
 ====================

 .. automodule:: fastNLP.core
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.core.batch module
 -------------------------

 .. automodule:: fastNLP.core.batch
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.callback module
 ----------------------------

 .. automodule:: fastNLP.core.callback
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.dataset module
 ---------------------------

 .. automodule:: fastNLP.core.dataset
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.fieldarray module
 ------------------------------

 .. automodule:: fastNLP.core.fieldarray
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.instance module
 ----------------------------

 .. automodule:: fastNLP.core.instance
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.losses module
 --------------------------

 .. automodule:: fastNLP.core.losses
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.metrics module
 ---------------------------

 .. automodule:: fastNLP.core.metrics
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.optimizer module
 -----------------------------

 .. automodule:: fastNLP.core.optimizer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.predictor module
 -----------------------------

 .. automodule:: fastNLP.core.predictor
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.sampler module
 ---------------------------

 .. automodule:: fastNLP.core.sampler
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.tester module
 --------------------------

 .. automodule:: fastNLP.core.tester
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.trainer module
 ---------------------------

 .. automodule:: fastNLP.core.trainer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.utils module
 -------------------------

 .. automodule:: fastNLP.core.utils
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.vocabulary module
 ------------------------------

 .. automodule:: fastNLP.core.vocabulary
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.io.rst
+++ b/docs/source/fastNLP.io.rst
@@ -1,60 +0,0 @@
 fastNLP.io package
 ==================

 .. automodule:: fastNLP.io
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.io.base\_loader module
 ------------------------------

 .. automodule:: fastNLP.io.base_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.io.config\_io module
 ----------------------------

 .. automodule:: fastNLP.io.config_io
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.io.dataset\_loader module
 ---------------------------------

 .. automodule:: fastNLP.io.dataset_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.io.embed\_loader module
 -------------------------------

 .. automodule:: fastNLP.io.embed_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.io.file\_reader module
 ------------------------------

 .. automodule:: fastNLP.io.file_reader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.io.model\_io module
 ---------------------------

 .. automodule:: fastNLP.io.model_io
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.models.rst
+++ b/docs/source/fastNLP.models.rst
@@ -1,108 +0,0 @@
 fastNLP.models package
 ======================

 .. automodule:: fastNLP.models
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.models.base\_model module
 ---------------------------------

 .. automodule:: fastNLP.models.base_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.bert module
 --------------------------

 .. automodule:: fastNLP.models.bert
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.biaffine\_parser module
 --------------------------------------

 .. automodule:: fastNLP.models.biaffine_parser
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.char\_language\_model module
 -------------------------------------------

 .. automodule:: fastNLP.models.char_language_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.cnn\_text\_classification module
 -----------------------------------------------

 .. automodule:: fastNLP.models.cnn_text_classification
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.enas\_controller module
 --------------------------------------

 .. automodule:: fastNLP.models.enas_controller
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.enas\_model module
 ---------------------------------

 .. automodule:: fastNLP.models.enas_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.enas\_trainer module
 -----------------------------------

 .. automodule:: fastNLP.models.enas_trainer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.enas\_utils module
 ---------------------------------

 .. automodule:: fastNLP.models.enas_utils
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.sequence\_modeling module
 ----------------------------------------

 .. automodule:: fastNLP.models.sequence_modeling
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.snli module
 --------------------------

 .. automodule:: fastNLP.models.snli
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.star\_transformer module
 ---------------------------------------

 .. automodule:: fastNLP.models.star_transformer
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.modules.aggregator.rst
+++ b/docs/source/fastNLP.modules.aggregator.rst
@@ -1,28 +0,0 @@
 fastNLP.modules.aggregator package
 ==================================

 .. automodule:: fastNLP.modules.aggregator
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.modules.aggregator.attention module
 -------------------------------------------

 .. automodule:: fastNLP.modules.aggregator.attention
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.aggregator.pooling module
 -----------------------------------------

 .. automodule:: fastNLP.modules.aggregator.pooling
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.modules.decoder.rst
+++ b/docs/source/fastNLP.modules.decoder.rst
@@ -1,36 +0,0 @@
 fastNLP.modules.decoder package
 ===============================

 .. automodule:: fastNLP.modules.decoder
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.modules.decoder.CRF module
 ----------------------------------

 .. automodule:: fastNLP.modules.decoder.CRF
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.decoder.MLP module
 ----------------------------------

 .. automodule:: fastNLP.modules.decoder.MLP
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.decoder.utils module
 ------------------------------------

 .. automodule:: fastNLP.modules.decoder.utils
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.modules.encoder.rst
+++ b/docs/source/fastNLP.modules.encoder.rst
@@ -1,100 +0,0 @@
 fastNLP.modules.encoder package
 ===============================

 .. automodule:: fastNLP.modules.encoder
    :members:
    :undoc-members:
    :show-inheritance:

 Submodules
 ----------

 fastNLP.modules.encoder.bert module
 -----------------------------------

 .. automodule:: fastNLP.modules.encoder.bert
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.char\_encoder module
 --------------------------------------------

 .. automodule:: fastNLP.modules.encoder.char_encoder
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.conv module
 -----------------------------------

 .. automodule:: fastNLP.modules.encoder.conv
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.conv\_maxpool module
 --------------------------------------------

 .. automodule:: fastNLP.modules.encoder.conv_maxpool
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.embedding module
 ----------------------------------------

 .. automodule:: fastNLP.modules.encoder.embedding
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.linear module
 -------------------------------------

 .. automodule:: fastNLP.modules.encoder.linear
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.lstm module
 -----------------------------------

 .. automodule:: fastNLP.modules.encoder.lstm
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.masked\_rnn module
 ------------------------------------------

 .. automodule:: fastNLP.modules.encoder.masked_rnn
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.star\_transformer module
 ------------------------------------------------

 .. automodule:: fastNLP.modules.encoder.star_transformer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.transformer module
 ------------------------------------------

 .. automodule:: fastNLP.modules.encoder.transformer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.variational\_rnn module
 -----------------------------------------------

 .. automodule:: fastNLP.modules.encoder.variational_rnn
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.modules.rst
+++ b/docs/source/fastNLP.modules.rst
@@ -1,45 +0,0 @@
 fastNLP.modules package
 =======================

 .. automodule:: fastNLP.modules
    :members:
    :undoc-members:
    :show-inheritance:

 Subpackages
 -----------

 .. toctree::

    fastNLP.modules.aggregator
    fastNLP.modules.decoder
    fastNLP.modules.encoder

 Submodules
 ----------

 fastNLP.modules.dropout module
 ------------------------------

 .. automodule:: fastNLP.modules.dropout
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.other\_modules module
 -------------------------------------

 .. automodule:: fastNLP.modules.other_modules
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.utils module
 ----------------------------

 .. automodule:: fastNLP.modules.utils
    :members:
    :undoc-members:
    :show-inheritance:


--- a/docs/source/fastNLP.rst
+++ b/docs/source/fastNLP.rst
@@ -1,21 +0,0 @@
 fastNLP package
 ===============

 .. automodule:: fastNLP
    :members:
    :undoc-members:
    :show-inheritance:

 Subpackages
 -----------

 .. toctree::

    fastNLP.api
    fastNLP.automl
    fastNLP.component
    fastNLP.core
    fastNLP.io
    fastNLP.models
    fastNLP.modules

--- a/fastNLP/init.py
+++ b/fastNLP/init.py
@@ -1,5 +1,15 @@
 """
 fastNLP 由 :mod:`~fastNLP.core` 、 :mod:`~fastNLP.io` 、:mod:`~fastNLP.modules` 等子模块组成，但常用的组件都可以直接 import ，常用组件如下：
 """
 __all__ = ["Instance", "FieldArray", "Batch", "Vocabulary", "DataSet",
           "Trainer", "Tester", "Callback",
           "Padder", "AutoPadder", "EngChar2DPadder",
           "AccuracyMetric", "Optimizer", "SGD", "Adam",
           "Sampler", "SequentialSampler", "BucketSampler", "RandomSampler",
           "LossFunc", "CrossEntropyLoss", "L1Loss", "BCELoss", "NLLLoss", "LossInForward",
           "cache_results"]
 from .core import *
 from . import models
 from . import modules

 __version__ = '0.4.0'
 __version__ = '0.4.0'
--- a/fastNLP/api/init.py
+++ b/fastNLP/api/init.py
@@ -1 +1,2 @@
 __all__ = ["CWS", "POS", "Parser"]
 from .api import CWS, POS, Parser
--- a/fastNLP/api/api.py
+++ b/fastNLP/api/api.py
@@ -1,41 +1,3 @@
 """
 api.api的介绍文档
    直接缩进会把上面的文字变成标题

 空行缩进的写法比较合理

    比较合理
    
 *这里是斜体内容*

 **这里是粗体内容**

 数学公式块

 .. math::
    E = mc^2
    
 .. note::
    注解型提示。
    
 .. warning::
   警告型提示。

 .. seealso::
    `参考与超链接 <https://willqvq.github.io/doc_guide/%E6%B3%A8%E9%87%8A%E6%8C%87%E5%AF%BC>`_

 普通代码块需要空一行, Example::

    from fitlog import fitlog
    fitlog.commit()
 
 普通下标和上标:

 H\ :sub:`2`\ O

 E = mc\ :sup:`2`

 """
 import warnings

 import torch
@@ -43,15 +5,14 @@ import torch
 warnings.filterwarnings('ignore')
 import os

 from fastNLP.core.dataset import DataSet

 from fastNLP.api.utils import load_url
 from fastNLP.api.processor import ModelProcessor
 from fastNLP.io.dataset_loader import _cut_long_sentence, ConllLoader
 from fastNLP.core.instance import Instance
 from fastNLP.api.pipeline import Pipeline
 from fastNLP.core.metrics import SpanFPreRecMetric
 from fastNLP.api.processor import IndexerProcessor
 from ..core.dataset import DataSet
 from .utils import load_url
 from .processor import ModelProcessor
 from ..io.dataset_loader import _cut_long_sentence, ConllLoader
 from ..core.instance import Instance
 from ..api.pipeline import Pipeline
 from ..core.metrics import SpanFPreRecMetric
 from .processor import IndexerProcessor

 # TODO add pretrain urls
 model_urls = {
@@ -63,9 +24,10 @@ model_urls = {

 class ConllCWSReader(object):
    """Deprecated. Use ConllLoader for all types of conll-format files."""
    
    def __init__(self):
        pass

    
    def load(self, path, cut_long_sent=False):
        """
        返回的DataSet只包含raw_sentence这个field，内容为str。
@@ -98,7 +60,7 @@ class ConllCWSReader(object):
                    sample.append(line.strip().split())
            if len(sample) > 0:
                datalist.append(sample)

        
        ds = DataSet()
        for sample in datalist:
            # print(sample)
@@ -113,7 +75,7 @@ class ConllCWSReader(object):
            for raw_sentence in sents:
                ds.append(Instance(raw_sentence=raw_sentence))
        return ds

    
    def get_char_lst(self, sample):
        if len(sample) == 0:
            return None
@@ -125,11 +87,13 @@ class ConllCWSReader(object):
            text.append(t1)
        return text


 class ConllxDataLoader(ConllLoader):
    """返回“词级别”的标签信息，包括词、词性、（句法）头依赖、（句法）边标签。跟``ZhConllPOSReader``完全不同。

        Deprecated. Use ConllLoader for all types of conll-format files.
    """
    
    def __init__(self):
        headers = [
            'words', 'pos_tags', 'heads', 'labels',
@@ -141,18 +105,15 @@ class ConllxDataLoader(ConllLoader):


 class API:
    """
        这是 API 类的文档
    """
    def __init__(self):
        self.pipeline = None
        self._dict = None

    
    def predict(self, *args, **kwargs):
        """Do prediction for the given input.
        """
        raise NotImplementedError

    
    def test(self, file_path):
        """Test performance over the given data set.

@@ -160,7 +121,7 @@ class API:
        :return: a dictionary of metric values
        """
        raise NotImplementedError

    
    def load(self, path, device):
        if os.path.exists(os.path.expanduser(path)):
            _dict = torch.load(path, map_location='cpu')
@@ -180,14 +141,14 @@ class POS(API):
    :param str device: device name such as "cpu" or "cuda:0". Use the same notation as PyTorch.

    """

    
    def __init__(self, model_path=None, device='cpu'):
        super(POS, self).__init__()
        if model_path is None:
            model_path = model_urls['pos']

        
        self.load(model_path, device)

    
    def predict(self, content):
        """predict函数的介绍，
        函数介绍的第二句，这句话不会换行
@@ -197,48 +158,48 @@ class POS(API):
        """
        if not hasattr(self, "pipeline"):
            raise ValueError("You have to load model first.")

        
        sentence_list = content
        # 1. 检查sentence的类型
        for sentence in sentence_list:
            if not all((type(obj) == str for obj in sentence)):
                raise ValueError("Input must be list of list of string.")

        
        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field("words", sentence_list)

        
        # 3. 使用pipeline
        self.pipeline(dataset)

        
        def merge_tag(words_list, tags_list):
            rtn = []
            for words, tags in zip(words_list, tags_list):
                rtn.append([w + "/" + t for w, t in zip(words, tags)])
            return rtn

        
        output = dataset.field_arrays["tag"].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return merge_tag(content, output)

    
    def test(self, file_path):
        test_data = ConllxDataLoader().load(file_path)

        
        save_dict = self._dict
        tag_vocab = save_dict["tag_vocab"]
        pipeline = save_dict["pipeline"]
        index_tag = IndexerProcessor(vocab=tag_vocab, field_name="tag", new_added_field_name="truth", is_input=False)
        pipeline.pipeline = [index_tag] + pipeline.pipeline

        
        test_data.rename_field("pos_tags", "tag")
        pipeline(test_data)
        test_data.set_target("truth")
        prediction = test_data.field_arrays["predict"].content
        truth = test_data.field_arrays["truth"].content
        seq_len = test_data.field_arrays["word_seq_origin_len"].content

        
        # padding by hand
        max_length = max([len(seq) for seq in prediction])
        for idx in range(len(prediction)):
@@ -252,7 +213,7 @@ class POS(API):
        f1 = round(test_result['f'] * 100, 2)
        pre = round(test_result['pre'] * 100, 2)
        rec = round(test_result['rec'] * 100, 2)

        
        return {"F1": f1, "precision": pre, "recall": rec}


@@ -263,14 +224,15 @@ class CWS(API):
    :param model_path: 当model_path为None，使用默认位置的model。如果默认位置不存在，则自动下载模型
    :param device: str，可以为'cpu', 'cuda'或'cuda:0'等。会将模型load到相应device进行推断。
    """
    
    def __init__(self, model_path=None, device='cpu'):
        
        super(CWS, self).__init__()
        if model_path is None:
            model_path = model_urls['cws']

        
        self.load(model_path, device)

    
    def predict(self, content):
        """
        分词接口。
@@ -281,27 +243,27 @@ class CWS(API):
        """
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        
        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        
        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('raw_sentence', sentence_list)

        
        # 3. 使用pipeline
        self.pipeline(dataset)

        
        output = dataset.get_field('output').content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output

    
    def test(self, filepath):
        """
        传入一个分词文件路径，返回该数据集上分词f1, precision, recall。
@@ -327,28 +289,28 @@ class CWS(API):
        tag_proc = self._dict['tag_proc']
        cws_model = self.pipeline.pipeline[-2].model
        pipeline = self.pipeline.pipeline[:-2]

        
        pipeline.insert(1, tag_proc)
        pp = Pipeline(pipeline)

        
        reader = ConllCWSReader()

        
        # te_filename = '/home/hyan/ctb3/test.conllx'
        te_dataset = reader.load(filepath)
        pp(te_dataset)

        from fastNLP.core.tester import Tester
        from fastNLP.core.metrics import BMESF1PreRecMetric

        
        from ..core.tester import Tester
        from ..core.metrics import BMESF1PreRecMetric
        
        tester = Tester(data=te_dataset, model=cws_model, metrics=BMESF1PreRecMetric(target='target'), batch_size=64,
                        verbose=0)
        eval_res = tester.test()

        
        f1 = eval_res['BMESF1PreRecMetric']['f']
        pre = eval_res['BMESF1PreRecMetric']['pre']
        rec = eval_res['BMESF1PreRecMetric']['rec']
        # print("f1:{:.2f}, pre:{:.2f}, rec:{:.2f}".format(f1, pre, rec))

        
        return {"F1": f1, "precision": pre, "recall": rec}


@@ -357,25 +319,25 @@ class Parser(API):
        super(Parser, self).__init__()
        if model_path is None:
            model_path = model_urls['parser']

        
        self.pos_tagger = POS(device=device)
        self.load(model_path, device)

    
    def predict(self, content):
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        
        # 1. 利用POS得到分词和pos tagging结果
        pos_out = self.pos_tagger.predict(content)
        # pos_out = ['这里/NN 是/VB 分词/NN 结果/NN'.split()]

        
        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('wp', pos_out)
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[0] for w in x['wp']], new_field_name='words')
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[1] for w in x['wp']], new_field_name='pos')
        dataset.rename_field("words", "raw_words")

        
        # 3. 使用pipeline
        self.pipeline(dataset)
        dataset.apply(lambda x: [str(arc) for arc in x['arc_pred']], new_field_name='arc_pred')
@@ -383,7 +345,7 @@ class Parser(API):
                                 zip(x['arc_pred'], x['label_pred_seq'])][1:], new_field_name='output')
        # output like: [['2/top', '0/root', '4/nn', '2/dep']]
        return dataset.field_arrays['output'].content

    
    def load_test_file(self, path):
        def get_one(sample):
            sample = list(map(list, zip(*sample)))
@@ -395,7 +357,7 @@ class Parser(API):
                    return None
            # return word_seq, pos_seq, head_seq, head_tag_seq
            return sample[1], sample[3], list(map(int, sample[6])), sample[7]

        
        datalist = []
        with open(path, 'r', encoding='utf-8') as f:
            sample = []
@@ -409,14 +371,14 @@ class Parser(API):
                    sample.append(line.split('\t'))
            if len(sample) > 0:
                datalist.append(sample)

        
        data = [get_one(sample) for sample in datalist]
        data_list = list(filter(lambda x: x is not None, data))
        return data_list

    
    def test(self, filepath):
        data = self.load_test_file(filepath)

        
        def convert(data):
            BOS = '<BOS>'
            dataset = DataSet()
@@ -431,7 +393,7 @@ class Parser(API):
                                        arc_true=heads,
                                        tags=head_tags))
            return dataset

        
        ds = convert(data)
        pp = self.pipeline
        for p in pp:
@@ -452,23 +414,23 @@ class Parser(API):
                head_cor += 1 if head_pred[i] == head_gold[i] else 0
        uas = head_cor / total
        # print('uas:{:.2f}'.format(uas))

        
        for p in pp:
            if p.field_name == 'gold_words':
                p.field_name = 'word_list'
            elif p.field_name == 'gold_pos':
                p.field_name = 'pos_list'

        
        return {"USA": round(uas, 5)}


 class Analyzer:
    def __init__(self, device='cpu'):

        
        self.cws = CWS(device=device)
        self.pos = POS(device=device)
        self.parser = Parser(device=device)

    
    def predict(self, content, seg=False, pos=False, parser=False):
        if seg is False and pos is False and parser is False:
            seg = True
@@ -482,9 +444,9 @@ class Analyzer:
        if parser:
            parser_output = self.parser.predict(content)
            output_dict['parser'] = parser_output

        
        return output_dict

    
    def test(self, filepath):
        output_dict = {}
        if self.cws:
@@ -496,5 +458,5 @@ class Analyzer:
        if self.parser:
            parser_output = self.parser.test(filepath)
            output_dict['parser'] = parser_output

        
        return output_dict
--- a/fastNLP/api/examples.py
+++ b/fastNLP/api/examples.py
@@ -3,7 +3,7 @@ api/example.py contains all API examples provided by fastNLP.
 It is used as a tutorial for API or a test script since it is difficult to test APIs in travis.

 """
 from fastNLP.api import CWS, POS, Parser
 from . import CWS, POS, Parser

 text = ['编者按：7月12日，英国航空航天系统公司公布了该公司研制的第一款高科技隐形无人机雷电之神。',
        '这款飞行从外型上来看酷似电影中的太空飞行器，据英国方面介绍，可以实现洲际远程打击。',
--- a/fastNLP/api/pipeline.py
+++ b/fastNLP/api/pipeline.py
@@ -1,4 +1,4 @@
 from fastNLP.api.processor import Processor
 from ..api.processor import Processor


 class Pipeline:
--- a/fastNLP/api/processor.py
+++ b/fastNLP/api/processor.py
@@ -3,10 +3,10 @@ from collections import defaultdict

 import torch

 from fastNLP.core.batch import Batch
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.sampler import SequentialSampler
 from fastNLP.core.vocabulary import Vocabulary
 from ..core.batch import Batch
 from ..core.dataset import DataSet
 from ..core.sampler import SequentialSampler
 from ..core.vocabulary import Vocabulary


 class Processor(object):
@@ -232,7 +232,7 @@ class SeqLenProcessor(Processor):
        return dataset


 from fastNLP.core.utils import _build_args
 from ..core.utils import _build_args


 class ModelProcessor(Processor):
--- a/fastNLP/automl/enas_trainer.py
+++ b/fastNLP/automl/enas_trainer.py
@@ -11,15 +11,15 @@ import torch
 try:
    from tqdm.autonotebook import tqdm
 except:
    from fastNLP.core.utils import _pseudo_tqdm as tqdm
    from ..core.utils import _pseudo_tqdm as tqdm

 from fastNLP.core.batch import Batch
 from fastNLP.core.callback import CallbackException
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.utils import _move_dict_value_to_device
 from ..core.batch import Batch
 from ..core.callback import CallbackException
 from ..core.dataset import DataSet
 from ..core.utils import _move_dict_value_to_device
 import fastNLP
 import fastNLP.automl.enas_utils as utils
 from fastNLP.core.utils import _build_args
 from . import enas_utils as utils
 from ..core.utils import _build_args

 from torch.optim import Adam

--- a/fastNLP/core/init.py
+++ b/fastNLP/core/init.py
@@ -1,6 +1,20 @@
 """
 core 模块里实现了 fastNLP 的核心框架，常用的组件都可以从 fastNLP 包中直接 import。当然你也同样可以从 core 模块的子模块中 import，
 例如 Batch 组件有两种 import 的方式::
    
    # 直接从 fastNLP 中 import
    from fastNLP import Batch
    
    # 从 core 模块的子模块 batch 中 import
    from fastNLP.core.batch import Batch

 对于常用的功能，你只需要在 :doc:`fastNLP` 中查看即可。如果想了解各个子模块的分工，您可以阅读以下文档：


 """
 from .batch import Batch
 from .dataset import DataSet
 from .fieldarray import FieldArray
 from .field import FieldArray, Padder, AutoPadder, EngChar2DPadder
 from .instance import Instance
 from .losses import LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward
 from .metrics import AccuracyMetric
--- a/fastNLP/core/batch.py
+++ b/fastNLP/core/batch.py
@@ -1,24 +1,34 @@
 """
 batch 模块实现了 fastNLP 所需的 Batch 类。

 """
 __all__ = ["Batch"]
 import numpy as np
 import torch
 import atexit

 from fastNLP.core.sampler import RandomSampler, Sampler
 from .sampler import RandomSampler, Sampler
 import torch.multiprocessing as mp

 _python_is_exit = False


 def _set_python_is_exit():
    global _python_is_exit
    _python_is_exit = True


 atexit.register(_set_python_is_exit)


 class Batch(object):
    """

     .. _Batch:
    别名：:class:`fastNLP.Batch` :class:`fastNLP.core.batch.Batch`

    Batch 用于从 `DataSet` 中按一定的顺序, 依次按 ``batch_size`` 的大小将数据取出.
    组成 `x` 和 `y`


    Example::

        batch = Batch(data_set, batch_size=16, sampler=SequentialSampler())
@@ -26,16 +36,19 @@ class Batch(object):
        for batch_x, batch_y in batch:
            # do stuff ...

    :param DataSet dataset: `DataSet` 对象, 数据集
    :param dataset: :class:`~fastNLP.DataSet` 对象, 数据集
    :param int batch_size: 取出的batch大小
    :param Sampler sampler: 规定使用的 Sample 方式. 若为 ``None`` , 使用 RandomSampler.
    :param sampler: 规定使用的 :class:`~fastNLP.Sampler` 方式. 若为 ``None`` , 使用 :class:`~fastNLP.RandomSampler`.
    
        Default: ``None``
    :param bool as_numpy: 若为 ``True`` , 输出batch为 numpy.array. 否则为 torch.Tensor.
    :param bool as_numpy: 若为 ``True`` , 输出batch为 numpy.array. 否则为 :class:`torch.Tensor`.
    
        Default: ``False``
    :param bool prefetch: 若为 ``True`` 使用多进程预先取出下一batch.
    
        Default: ``False``
    """

    
    def __init__(self, dataset, batch_size, sampler=None, as_numpy=False, prefetch=False):
        self.dataset = dataset
        self.batch_size = batch_size
@@ -49,17 +62,17 @@ class Batch(object):
        self.cur_batch_indices = None
        self.prefetch = prefetch
        self.lengths = 0

    def _fetch_one(self):
    
    def fetch_one(self):
        if self.curidx >= len(self.idx_list):
            return None
        else:
            endidx = min(self.curidx + self.batch_size, len(self.idx_list))
            batch_x, batch_y = {}, {}

            
            indices = self.idx_list[self.curidx:endidx]
            self.cur_batch_indices = indices

            
            for field_name, field in self.dataset.get_all_fields().items():
                if field.is_target or field.is_input:
                    batch = field.get(indices)
@@ -69,10 +82,10 @@ class Batch(object):
                        batch_y[field_name] = batch
                    if field.is_input:
                        batch_x[field_name] = batch

            
            self.curidx = endidx
            return batch_x, batch_y

    
    def __iter__(self):
        """
        Iterate on dataset, fetch batch data. Fetch process don't block the iterate process
@@ -80,25 +93,28 @@ class Batch(object):
        """
        if self.prefetch:
            return _run_batch_iter(self)
        
        def batch_iter():
            self._init_iter()
            self.init_iter()
            while 1:
                res = self._fetch_one()
                res = self.fetch_one()
                if res is None:
                    break
                yield res
        
        return batch_iter()

    def _init_iter(self):
    
    def init_iter(self):
        self.idx_list = self.sampler(self.dataset)
        self.curidx = 0
        self.lengths = self.dataset.get_length()

    
    def __len__(self):
        return self.num_batches

    
    def get_batch_indices(self):
        """取得当前batch在DataSet中所在的index下标序列
        """
        取得当前batch在DataSet中所在的index下标序列

        :return list(int) indexes: 下标序列
        """
@@ -118,16 +134,16 @@ def _to_tensor(batch, dtype):

 def _run_fetch(batch, q):
    global _python_is_exit
    batch._init_iter()
    batch.init_iter()
    # print('start fetch')
    while 1:
        res = batch._fetch_one()
        res = batch.fetch_one()
        # print('fetch one')
        while 1:
            try:
                q.put(res, timeout=3)
                break
            except Exception as e:
            except:
                if _python_is_exit:
                    return
        if res is None:
@@ -159,4 +175,3 @@ def _run_batch_iter(batch):
    fetch_p.terminate()
    fetch_p.join()
    # print('iter done')

--- a/fastNLP/core/callback.py
+++ b/fastNLP/core/callback.py
@@ -1,87 +1,89 @@
 """
 Callback的说明文档

 .. _Callback:

 Callback是fastNLP中被设计用于增强 Trainer_ 的类。如果Callback被传递给了 Trainer_ , 则 Trainer_ 会在对应的阶段调用Callback
 的函数，具体调用时机可以通过 Trainer_ 查看。

 callback模块实现了 fastNLP 中的Callback类，用于增强 :class:`~fastNLP.Trainer` 类，
 关于Trainer的详细文档，请参见 :doc:`trainer 模块<fastNLP.core.trainer>`
 """

 import os
 import torch
 from fastNLP.io.model_io import ModelSaver, ModelLoader
 from ..io.model_io import ModelSaver, ModelLoader

 try:
    from tensorboardX import SummaryWriter
 except:
    pass

 class Callback(object):
    """这是Callback的基类，所有的callback必须继承自这个类。

 class Callback(object):
    """
    别名：:class:`fastNLP.Callback` :class:`fastNLP.core.callback.Callback`

    Callback是fastNLP中被设计用于增强 :class:`~fastNLP.Trainer` 的类。
    如果Callback被传递给了 Trainer , 则 Trainer 会在对应的阶段调用Callback的函数，
    具体调用时机可以通过 :doc:`trainer 模块<fastNLP.core.trainer>` 查看。
    这是Callback的基类，所有的callback必须继承自这个类（参见 :doc:`callback 模块 <fastNLP.core.callback>` ）

    """
    
    def __init__(self):
        super(Callback, self).__init__()
        self._trainer = None  # 在Trainer内部被重新赋值

    
    @property
    def trainer(self):
        """
        该属性可以通过self.trainer获取到，一般情况下不需要使用这个属性。
        """
        return self._trainer

    
    @property
    def step(self):
        """当前运行到的step, 范围为[1, self.n_steps+1)"""
        return self._trainer.step

    
    @property
    def n_steps(self):
        """Trainer一共会运行多少步"""
        return self._trainer.n_steps

    
    @property
    def batch_size(self):
        """train和evaluate时的batch_size为多大"""
        return self._trainer.batch_size

    
    @property
    def epoch(self):
        """当前运行的epoch数，范围是[1, self.n_epochs+1)"""
        return self._trainer.epoch

    
    @property
    def n_epochs(self):
        """一共会运行多少个epoch"""
        return self._trainer.n_epochs

    
    @property
    def optimizer(self):
        """初始化Trainer时传递的Optimizer"""
        return self._trainer.optimizer

    
    @property
    def model(self):
        """正在被Trainer训练的模型"""
        return self._trainer.model

    
    @property
    def pbar(self):
        """如果在Callback中需要打印内容，请使用self.pbar.write(str)。否则可能出现命令行显示效果不太好的问题。"""
        return self._trainer.pbar

    
    @property
    def update_every(self):
        """Trainer中的模型多少次反向传播才进行一次梯度更新，在Trainer初始化时传入的。"""
        return self._trainer.update_every

    
    @property
    def batch_per_epoch(self):
        """每个epoch一共有多少个batch，只有在on_epoch_begin之后才能调用该属性。"""
        return self._trainer.batch_per_epoch

    
    def on_train_begin(self):
        """
        在Train过程开始之前调用。
@@ -89,7 +91,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_epoch_begin(self):
        """
        在每个epoch开始之前调用一次
@@ -97,7 +99,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_batch_begin(self, batch_x, batch_y, indices):
        """
        每次采集到一个batch的数据则调用一次。这里对batch_x或batch_y删除添加内容是可以影响到Trainer中内容的。所以在这一步
@@ -110,7 +112,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_loss_begin(self, batch_y, predict_y):
        """
        在计算loss前调用，即这里修改batch_y或predict_y的值是可以影响到loss计算的。
@@ -120,7 +122,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_backward_begin(self, loss):
        """
        在loss得到之后，但在反向传播之前。可能可以进行loss是否为NaN的检查。
@@ -129,7 +131,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_backward_end(self):
        """
        反向梯度传播已完成，但由于update_every的设置，可能并不是每一次调用都有梯度。到这一步，还没有更新参数。
@@ -137,7 +139,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_step_end(self):
        """
        到这里模型的参数已经按照梯度更新。但可能受update_every影响，并不是每次都更新了。
@@ -145,14 +147,14 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_batch_end(self):
        """
        这一步与on_step_end是紧接着的。只是为了对称性加上了这一步。

        """
        pass

    
    def on_valid_begin(self):
        """
        如果Trainer中设置了验证，则发生验证前会调用该函数
@@ -160,7 +162,7 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
        """
        每次执行验证集的evaluation后会调用。
@@ -173,19 +175,19 @@ class Callback(object):
        :return:
        """
        pass

    
    def on_epoch_end(self):
        """
        每个epoch结束将会调用该方法
        """
        pass

    
    def on_train_end(self):
        """
        训练结束，调用该方法
        """
        pass

    
    def on_exception(self, exception):
        """
        当训练过程出现异常，会触发该方法
@@ -196,32 +198,31 @@ class Callback(object):

 def _transfer(func):
    """装饰器，将对CallbackManager的调用转发到各个Callback子类.
    
    :param func:
    :return:
    """

    
    def wrapper(manager, *arg):
        returns = []
        for callback in manager.callbacks:
            returns.append(getattr(callback, func.__name__)(*arg))
        return returns

    
    return wrapper


 class CallbackManager(Callback):
    """内部使用的Callback管理类
    """

    def __init__(self, env, callbacks=None):
        """
        内部使用的Callback管理类

        :param dict env: The key is the name of the Trainer attribute(str). The value is the attribute itself.
        :param List[Callback] callbacks:
        """
        super(CallbackManager, self).__init__()
        # set attribute of trainer environment

        
        self.callbacks = []
        if callbacks is not None:
            if isinstance(callbacks, list):
@@ -232,78 +233,82 @@ class CallbackManager(Callback):
                    raise TypeError(f"Expect sub-classes of Callback. Got {type(obj)}")
            else:
                raise TypeError(f"Expect callbacks in CallbackManager(callbacks) to be list. Got {type(callbacks)}.")

        
        for env_name, env_val in env.items():
            for callback in self.callbacks:
                setattr(callback, '_'+env_name, env_val)  # Callback.trainer

                setattr(callback, '_' + env_name, env_val)  # Callback.trainer
    
    @_transfer
    def on_train_begin(self):
        pass

    
    @_transfer
    def on_epoch_begin(self):
        pass

    
    @_transfer
    def on_batch_begin(self, batch_x, batch_y, indices):
        pass

    
    @_transfer
    def on_loss_begin(self, batch_y, predict_y):
        pass

    
    @_transfer
    def on_backward_begin(self, loss):
        pass

    
    @_transfer
    def on_backward_end(self):
        pass

    
    @_transfer
    def on_step_end(self):
        pass

    
    @_transfer
    def on_batch_end(self):
        pass

    
    @_transfer
    def on_valid_begin(self):
        pass

    
    @_transfer
    def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
        pass

    
    @_transfer
    def on_epoch_end(self):
        pass

    
    @_transfer
    def on_train_end(self):
        pass

    
    @_transfer
    def on_exception(self, exception):
        pass


 class GradientClipCallback(Callback):
    """每次backward前，将parameter的gradient clip到某个范围。

    :param None,torch.Tensor,List[torch.Tensor] parameters: 一般通过model.parameters()获得。如果为None则默认对Trainer
        的model中所有参数进行clip
    :param float clip_value: 将gradient 限制到[-clip_value, clip_value]。clip_value应该为正数
    :param str clip_type: 支持'norm', 'value'两种::

            1 'norm', 将gradient的norm rescale到[-clip_value, clip_value]
        
            2 'value', 将gradient限制在[-clip_value, clip_value], 小于-clip_value的gradient被赋值为-clip_value;
            大于clip_value的gradient被赋值为clip_value.
    """
    
    def __init__(self, parameters=None, clip_value=1, clip_type='norm'):
        """每次backward前，将parameter的gradient clip到某个范围。

        :param None,torch.Tensor,List[torch.Tensor] parameters: 一般通过model.parameters()获得。如果为None则默认对Trainer
            的model中所有参数进行clip
        :param float clip_value: 将gradient 限制到[-clip_value, clip_value]。clip_value应该为正数
        :param str clip_type: 支持'norm', 'value'两种。
            1. 'norm', 将gradient的norm rescale到[-clip_value, clip_value]
            2. 'value', 将gradient限制在[-clip_value, clip_value], 小于-clip_value的gradient被赋值为-clip_value; 大于
            clip_value的gradient被赋值为clip_value.
        """
        
        super().__init__()

        
        from torch import nn
        if clip_type == 'norm':
            self.clip_fun = nn.utils.clip_grad_norm_
@@ -313,7 +318,7 @@ class GradientClipCallback(Callback):
            raise ValueError("Only supports `norm` or `value` right now.")
        self.parameters = parameters
        self.clip_value = clip_value

    
    def on_backward_end(self):
        if self.parameters is None:
            self.clip_fun(self.model.parameters(), self.clip_value)
@@ -321,31 +326,17 @@ class GradientClipCallback(Callback):
            self.clip_fun(self.parameters, self.clip_value)


 class CallbackException(BaseException):
    def __init__(self, msg):
        """
        当需要通过callback跳出训练的时候可以通过抛出CallbackException并在on_exception中捕获这个值。
        :param str msg: Exception的信息。
        """
        super(CallbackException, self).__init__(msg)


 class EarlyStopError(CallbackException):
    def __init__(self, msg):
        """用于EarlyStop时从Trainer训练循环中跳出。"""
        super(EarlyStopError, self).__init__(msg)


 class EarlyStopCallback(Callback):
    def __init__(self, patience):
        """
    """

        :param int patience: 多少个epoch没有变好就停止训练
        """
    :param int patience: 多少个epoch没有变好就停止训练
    """
    
    def __init__(self, patience):
        super(EarlyStopCallback, self).__init__()
        self.patience = patience
        self.wait = 0

    
    def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
        if not is_better_eval:
            # current result is getting worse
@@ -355,7 +346,7 @@ class EarlyStopCallback(Callback):
                self.wait += 1
        else:
            self.wait = 0

    
    def on_exception(self, exception):
        if isinstance(exception, EarlyStopError):
            print("Early Stopping triggered in epoch {}!".format(self.epoch))
@@ -364,39 +355,41 @@ class EarlyStopCallback(Callback):


 class LRScheduler(Callback):
    def __init__(self, lr_scheduler):
        """对PyTorch LR Scheduler的包装以使得其可以被Trainer所使用

        Example::
    """对PyTorch LR Scheduler的包装以使得其可以被Trainer所使用

            from fastNLP import LRScheduler
    Example::

        from fastNLP import LRScheduler


        :param torch.optim.lr_scheduler._LRScheduler lr_scheduler: PyTorch的lr_scheduler
        """
    :param torch.optim.lr_scheduler._LRScheduler lr_scheduler: PyTorch的lr_scheduler
    """
    
    def __init__(self, lr_scheduler):
        
        super(LRScheduler, self).__init__()
        import torch.optim
        if isinstance(lr_scheduler, torch.optim.lr_scheduler._LRScheduler):
            self.scheduler = lr_scheduler
        else:
            raise ValueError(f"Expect torch.optim.lr_scheduler for LRScheduler. Got {type(lr_scheduler)}.")

    
    def on_epoch_begin(self):
        self.scheduler.step()


 class ControlC(Callback):
    def __init__(self, quit_all):
        """
    """

        :param bool quit_all: 若为True,则检测到control+C 直接退出程序；否则只退出Trainer
        """
    :param bool quit_all: 若为True,则检测到control+C 直接退出程序；否则只退出Trainer
    """
    
    def __init__(self, quit_all):
        
        super(ControlC, self).__init__()
        if type(quit_all) != bool:
            raise ValueError("In KeyBoardInterrupt, quit_all arguemnt must be a bool.")
        self.quit_all = quit_all

    
    def on_exception(self, exception):
        if isinstance(exception, KeyboardInterrupt):
            if self.quit_all is True:
@@ -412,7 +405,7 @@ class SmoothValue(object):
    def __init__(self, beta: float):
        self.beta, self.n, self.mov_avg = beta, 0, 0
        self.smooth = None

    
    def add_value(self, val: float) -> None:
        "Add `val` to calculate updated smoothed value."
        self.n += 1
@@ -421,13 +414,15 @@ class SmoothValue(object):


 class LRFinder(Callback):
    def __init__(self, start_lr=1e-6, end_lr=10):
        """用第一个 epoch 找最佳的学习率，从第二个epoch开始应用它
    """
    用第一个 epoch 找最佳的学习率，从第二个epoch开始应用它

        :param int n_batch: 一个epoch内的iteration数
        :param float start_lr: 学习率下界
        :param float end_lr: 学习率上界
        """
    :param float start_lr: 学习率下界
    :param float end_lr: 学习率上界
    """
    
    def __init__(self, start_lr=1e-6, end_lr=10):
        
        super(LRFinder, self).__init__()
        self.start_lr, self.end_lr = start_lr, end_lr
        self.num_it = self.batch_per_epoch
@@ -438,19 +433,19 @@ class LRFinder(Callback):
        self.smooth_value = SmoothValue(0.8)
        self.opt = None
        scale = (self.end_lr - self.start_lr) / self.num_it

        
        self.lr_gen = (self.start_lr + scale * (step + 1) for step in range(self.num_it))
        self.find = None
        self.loader = ModelLoader()

    
    def on_epoch_begin(self):
        if self.epoch == 1: # first epoch
        if self.epoch == 1:  # first epoch
            self.opt = self.trainer.optimizer  # pytorch optimizer
            self.opt.param_groups[0]["lr"] = self.start_lr
            # save model
            ModelSaver("tmp").save_pytorch(self.trainer.model, param_only=True)
            self.find = True

    
    def on_backward_begin(self, loss):
        if self.find:
            if torch.isnan(loss) or self.stop is True:
@@ -462,7 +457,7 @@ class LRFinder(Callback):
            if self.best_loss == 0. or self.smooth_value.smooth < self.best_loss:
                self.best_loss = self.smooth_value.smooth
                self.best_lr = self.opt.param_groups[0]["lr"]

    
    def on_batch_end(self, *args):
        if self.find:
            lr = next(self.lr_gen, None)
@@ -471,9 +466,9 @@ class LRFinder(Callback):
                return
            self.opt.param_groups[0]["lr"] = lr
            # self.loader.load_pytorch(self.trainer.model, "tmp")

    
    def on_epoch_end(self):
        if self.epoch == 1: # first epoch
        if self.epoch == 1:  # first epoch
            self.opt.param_groups[0]["lr"] = self.best_lr
            self.find = False
            # reset model
@@ -483,12 +478,12 @@ class LRFinder(Callback):

 class TensorboardCallback(Callback):
    """
        接受以下一个或多个字符串作为参数：
        - "model"
        - "loss"
        - "metric"
    接受以下一个或多个字符串作为参数：
    - "model"
    - "loss"
    - "metric"
    """

    
    def __init__(self, *options):
        super(TensorboardCallback, self).__init__()
        args = {"model", "loss", "metric"}
@@ -498,7 +493,7 @@ class TensorboardCallback(Callback):
        self.options = options
        self._summary_writer = None
        self.graph_added = False

    
    def on_train_begin(self):
        save_dir = self.trainer.save_path
        if save_dir is None:
@@ -506,7 +501,7 @@ class TensorboardCallback(Callback):
        else:
            path = os.path.join(save_dir, 'tensorboard_logs_{}'.format(self.trainer.start_time))
        self._summary_writer = SummaryWriter(path)

    
    def on_batch_begin(self, batch_x, batch_y, indices):
        if "model" in self.options and self.graph_added is False:
            # tesorboardX 这里有大bug，暂时没法画模型图
@@ -516,11 +511,11 @@ class TensorboardCallback(Callback):
            # args = args[0] if len(args) == 1 else args
            # self._summary_writer.add_graph(self.trainer.model, torch.zeros(32, 2))
            self.graph_added = True

    
    def on_backward_begin(self, loss):
        if "loss" in self.options:
            self._summary_writer.add_scalar("loss", loss.item(), global_step=self.trainer.step)

        
        if "model" in self.options:
            for name, param in self.trainer.model.named_parameters():
                if param.requires_grad:
@@ -528,21 +523,40 @@ class TensorboardCallback(Callback):
                    # self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.trainer.step)
                    self._summary_writer.add_scalar(name + "_grad_mean", param.grad.mean(),
                                                    global_step=self.trainer.step)

    
    def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
        if "metric" in self.options:
            for name, metric in eval_result.items():
                for metric_key, metric_val in metric.items():
                    self._summary_writer.add_scalar("valid_{}_{}".format(name, metric_key), metric_val,
                                                    global_step=self.trainer.step)

    
    def on_train_end(self):
        self._summary_writer.close()
        del self._summary_writer

    
    def on_exception(self, exception):
        if hasattr(self, "_summary_writer"):
            self._summary_writer.close()
            del self._summary_writer


 class CallbackException(BaseException):
    """
   当需要通过callback跳出训练的时候可以通过抛出CallbackException并在on_exception中捕获这个值。

   :param str msg: Exception的信息。
   """
    
    def __init__(self, msg):
        super(CallbackException, self).__init__(msg)


 class EarlyStopError(CallbackException):
    """
    用于EarlyStop时从Trainer训练循环中跳出。
    
    """
    
    def __init__(self, msg):
        super(EarlyStopError, self).__init__(msg)
--- a/fastNLP/core/const.py
+++ b/fastNLP/core/const.py
@@ -0,0 +1,46 @@
 class Const:
    """fastNLP中field命名常量。
    具体列表::

        INPUT       模型的序列输入      words（复数words1, words2）
        CHAR_INPUT  模型character输入  chars（复数chars1， chars2）
        INPUT_LEN   序列长度           seq_len（复数seq_len1，seq_len2）
        OUTPUT      模型输出           pred（复数pred1， pred2）
        TARGET      真实目标           target（复数target1，target2）

    """
    INPUT = 'words'
    CHAR_INPUT = 'chars'
    INPUT_LEN = 'seq_len'
    OUTPUT = 'pred'
    TARGET = 'target'

    @staticmethod
    def INPUTS(i):
        """得到第 i 个 ``INPUT`` 的命名"""
        i = int(i) + 1
        return Const.INPUT + str(i)

    @staticmethod
    def CHAR_INPUTS(i):
        """得到第 i 个 ``CHAR_INPUT`` 的命名"""
        i = int(i) + 1
        return Const.CHAR_INPUT + str(i)

    @staticmethod
    def INPUT_LENS(i):
        """得到第 i 个 ``INPUT_LEN`` 的命名"""
        i = int(i) + 1
        return Const.INPUT_LEN + str(i)

    @staticmethod
    def OUTPUTS(i):
        """得到第 i 个 ``OUTPUT`` 的命名"""
        i = int(i) + 1
        return Const.OUTPUT + str(i)

    @staticmethod
    def TARGETS(i):
        """得到第 i 个 ``TARGET`` 的命名"""
        i = int(i) + 1
        return Const.TARGET + str(i)
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
--- a/fastNLP/core/fieldarray.py
+++ b/fastNLP/core/fieldarray.py
@@ -1,7 +1,6 @@
 """
 FieldArray是  DataSet_ 中一列的存储方式，原理部分请参考 DataSet_ 处

 .. _FieldArray:
 field模块实现了 FieldArray 和若干 Padder。 FieldArray 是  :class:`~fastNLP.DataSet` 中一列的存储方式，
 原理部分请参考 :doc:`fastNLP.core.dataset`

 """

@@ -11,19 +10,21 @@ from copy import deepcopy


 class FieldArray(object):
    """
    别名：:class:`fastNLP.FieldArray` :class:`fastNLP.core.field.FieldArray`

    FieldArray 是用于保存 :class:`~fastNLP.DataSet` 中一个field的类型。
    
    :param str name: FieldArray的名称
    :param list,numpy.ndarray content: 列表的元素可以为list，int，float，
    :param bool is_target: 这个field是否是一个target field。
    :param bool is_input: 这个field是否是一个input field。
    :param padder: :class:`~fastNLP.Padder` 类型。赋值给fieldarray的padder的对象会被deepcopy一份，需要修改padder参数必须通过
       fieldarray.set_pad_val()。默认为None，即使用 :class:`~fastNLP.AutoPadder`  。
    :param bool ignore_type: 是否忽略该field的type，一般如果这个field不需要转为torch.FloatTensor或torch.LongTensor,
        就可以设置为True。具体意义请参考 :class:`~fastNLP.DataSet` 。
    """
    def __init__(self, name, content, is_target=None, is_input=None, padder=None, ignore_type=False):
        """FieldArray是用于保存 DataSet_ 中一个field的实体。

        :param str name: FieldArray的名称
        :param list,numpy.ndarray content: 列表的元素可以为list，int，float，
        :param bool is_target: 这个field是否是一个target field。
        :param bool is_input: 这个field是否是一个input field。
        :param Padder padder: PadderBase类型。赋值给fieldarray的padder的对象会被deepcopy一份，需要修改padder参数必须通过
            fieldarray.set_pad_val()。默认为None，即使用 AutoPadder_ 。
        :param bool ignore_type: 是否忽略该field的type，一般如果这个field不需要转为torch.FloatTensor或torch.LongTensor, 就
            可以设置为True。具体意义请参考 DataSet_ 。
        """

        self.name = name
        if isinstance(content, list):
            # 如果DataSet使用dict初始化, content 可能是二维list/二维array/三维list
@@ -87,14 +88,15 @@ class FieldArray(object):
    @is_target.setter
    def is_target(self, value):
        """
            当 field_array.is_target = True / False 时被调用
        当 field_array.is_target = True / False 时被调用
        """
        if value is True:
            self._set_dtype()
        self._is_target = value

    def _type_detection(self, content):
        """当该field被设置为is_input或者is_target时被调用
        """
        当该field被设置为is_input或者is_target时被调用

        """
        if len(content) == 0:
@@ -238,11 +240,12 @@ class FieldArray(object):
        self.content[idx] = val

    def get(self, indices, pad=True):
        """根据给定的indices返回内容
        """
        根据给定的indices返回内容

        :param  int,list(int) indices:, 获取indices对应的内容。
        :param bool pad: , 是否对返回的结果进行padding。仅对indices为List[int]时有效
        :return: (single, List)
        :param int,List[int] indices: 获取indices对应的内容。
        :param bool pad:  是否对返回的结果进行padding。仅对indices为List[int]时有效
        :return: 根据给定的indices返回的内容，可能是单个值或List
        """
        if isinstance(indices, int):
            return self.content[indices]
@@ -259,8 +262,7 @@ class FieldArray(object):
        """
        设置padder，在这个field进行pad的时候用这个padder进行pad，如果为None则不进行pad。

        :param None,Padder padder:. 设置为None即删除padder。
        :return:
        :param padder: :class:`~fastNLP.Padder` 类型，设置为None即删除padder。
        """
        if padder is not None:
            assert isinstance(padder, Padder), "padder must be of type Padder."
@@ -269,10 +271,10 @@ class FieldArray(object):
            self.padder = None

    def set_pad_val(self, pad_val):
        """修改padder的pad_val.
        """
        修改padder的pad_val.

        :param int pad_val: 该field的pad值设置为该值。
        :return:
        """
        if self.padder is not None:
            self.padder.set_pad_val(pad_val)
@@ -280,7 +282,8 @@ class FieldArray(object):


    def __len__(self):
        """Returns the size of FieldArray.
        """
        Returns the size of FieldArray.

        :return int length:
        """
@@ -288,10 +291,11 @@ class FieldArray(object):

    def to(self, other):
        """
        将other的属性复制给本FieldArray(other必须为FieldArray类型).属性包括 is_input, is_target, padder, ignore_type
        将other的属性复制给本FieldArray(other必须为FieldArray类型).
        属性包括 is_input, is_target, padder, ignore_type

        :param FieldArray other: 从哪个field拷贝属性
        :return: FieldArray
        :param  other: :class:`~fastNLP.FieldArray` 从哪个field拷贝属性
        :return: :class:`~fastNLP.FieldArray`
        """
        assert isinstance(other, FieldArray), "Only support FieldArray type, not {}.".format(type(other))

@@ -312,10 +316,20 @@ def _is_iterable(content):

 class Padder:
    """
         .. _Padder:
    别名：:class:`fastNLP.Padder` :class:`fastNLP.core.field.Padder`

        所有padder都需要继承这个类，并覆盖__call__()方法。
        用于对batch进行padding操作。传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前deepcopy一份。
    所有padder都需要继承这个类，并覆盖__call__方法。
    用于对batch进行padding操作。传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前deepcopy一份。
    
    .. py:function:: __call__(self, contents, field_name, field_ele_dtype):
        传入的是List内容。假设有以下的DataSet。
        
        :param list(Any) contents: 传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前
            deepcopy一份。
        :param str, field_name: field的名称。
        :param np.int64,np.float64,np.str,None, field_ele_dtype: 该field的内层元素的类型。如果该field的ignore_type为True，该这个值为None。
        :return: np.array([padded_element])
    
    """

    def __init__(self, pad_val=0, **kwargs):
@@ -368,7 +382,7 @@ class Padder:

 class AutoPadder(Padder):
    """
     .. _AutoPadder:
    别名：:class:`fastNLP.AutoPadder` :class:`fastNLP.core.field.AutoPadder`

    根据contents的数据自动判定是否需要做padding。

@@ -420,7 +434,7 @@ class AutoPadder(Padder):

 class EngChar2DPadder(Padder):
    """
        .. _EngChar2DPadder:
    别名：:class:`fastNLP.EngChar2DPadder` :class:`fastNLP.core.field.EngChar2DPadder`

    用于为英语执行character级别的2D padding操作。对应的field内容应该类似[['T', 'h', 'i', 's'], ['a'], ['d', 'e', 'm', 'o']]，
    但这个Padder只能处理index为int的情况。
--- a/fastNLP/core/instance.py
+++ b/fastNLP/core/instance.py
@@ -1,47 +1,50 @@
 """
 Instance文档

 .. _Instance:

 Instance是fastNLP中对应于一个sample的类。一个sample可以认为是fastNLP中的一个Instance对象。一个具像化的表示类似与 DataSet_
 出那个表中所展示的一行。
 instance 模块实现了Instance 类在fastNLP中对应sample。一个sample可以认为是一个Instance类型的对象。
 便于理解的例子可以参考文档 :doc:`fastNLP.core.dataset` 中的表格

 """

 __all__ = ["Instance"]


 class Instance(object):
    """
    别名：:class:`fastNLP.Instance` :class:`fastNLP.core.instance.Instance`

    Instance是fastNLP中对应一个sample的类。每个sample在fastNLP中是一个Instance对象。
    Instance一般与 :class:`~fastNLP.DataSet` 一起使用, Instance的初始化如下面的Example所示

    Example::
    
        >>>from fastNLP import Instance
        >>>ins = Instance(field_1=[1, 1, 1], field_2=[2, 2, 2])
        >>>ins["field_1"]
        [1, 1, 1]
        >>>ins.add_field("field_3", [3, 3, 3])
        >>>ins = Instance(**{'x1': 1, 'x2':np.zeros((3, 4))})
    """
    
    def __init__(self, **fields):
        """Instance的初始化如下面的Example所示

        Example::

            ins = Instance(field_1=[1, 1, 1], field_2=[2, 2, 2])
            ins["field_1"]
            >>[1, 1, 1]
            ins.add_field("field_3", [3, 3, 3])

            ins = Instance(**{'x1': 1, 'x2':np.zeros((3, 4))})
        """
        
        self.fields = fields

    
    def add_field(self, field_name, field):
        """向Instance中增加一个field
        """
        向Instance中增加一个field

        :param str field_name: 新增field的名称
        :param Any field: 新增field的内容
        """
        self.fields[field_name] = field

    
    def __getitem__(self, name):
        if name in self.fields:
            return self.fields[name]
        else:
            raise KeyError("{} not found".format(name))

    
    def __setitem__(self, name, field):
        return self.add_field(name, field)

    
    def __repr__(self):
        s = '\''
        return "{" + ",\n".join(
--- a/fastNLP/core/losses.py
+++ b/fastNLP/core/losses.py
@@ -1,36 +1,34 @@
 """

 .. _LossBase:

 .. _Loss:
 losses 模块定义了 fastNLP 中所需的各种损失函数，一般做为 :class:`~fastNLP.Trainer` 的参数使用。

 """

 __all__ = ["LossBase", "L1Loss", "LossFunc", "LossInForward", "BCELoss", "CrossEntropyLoss", "NLLLoss"]
 import inspect
 from collections import defaultdict

 import torch
 import torch.nn.functional as F

 from fastNLP.core.utils import _CheckError
 from fastNLP.core.utils import _CheckRes
 from fastNLP.core.utils import _build_args
 from fastNLP.core.utils import _check_arg_dict_list
 from fastNLP.core.utils import _check_function_or_method
 from fastNLP.core.utils import _get_func_signature
 from .utils import _CheckError
 from .utils import _CheckRes
 from .utils import _build_args
 from .utils import _check_arg_dict_list
 from .utils import _check_function_or_method
 from .utils import _get_func_signature


 class LossBase(object):
    """所有loss的基类.

    """
    所有loss的基类。如果想了解其中的原理，请查看源码。
    """
    
    def __init__(self):
        self.param_map = {}
        self._checked = False

    
    def get_loss(self, *args, **kwargs):
        raise NotImplementedError

    
    def _init_param_map(self, key_map=None, **kwargs):
        """检查key_map和其他参数map，并将这些映射关系添加到self.param_map

@@ -63,7 +61,7 @@ class LossBase(object):
        for value, key_set in value_counter.items():
            if len(key_set) > 1:
                raise ValueError(f"Several parameters:{key_set} are provided with one output {value}.")

        
        # check consistence between signature and param_map
        func_spect = inspect.getfullargspec(self.get_loss)
        func_args = [arg for arg in func_spect.args if arg != 'self']
@@ -72,12 +70,12 @@ class LossBase(object):
                raise NameError(
                    f"Parameter `{func_param}` is not in {_get_func_signature(self.get_loss)}. Please check the "
                    f"initialization parameters, or change its signature.")

        
        # evaluate should not have varargs.
        # if func_spect.varargs:
        #     raise NameError(f"Delete `*{func_spect.varargs}` in {get_func_signature(self.get_loss)}(Do not use "
        #                     f"positional argument.).")

    
    def _fast_param_map(self, pred_dict, target_dict):
        """Only used as inner function. When the pred_dict, target is unequivocal. Don't need users to pass key_map.
            such as pred_dict has one element, target_dict has one element
@@ -92,7 +90,7 @@ class LossBase(object):
            fast_param['target'] = list(target_dict.values())[0]
            return fast_param
        return fast_param

    
    def __call__(self, pred_dict, target_dict, check=False):
        """
        :param dict pred_dict: 模型的forward函数返回的dict
@@ -104,7 +102,7 @@ class LossBase(object):
        if fast_param:
            loss = self.get_loss(**fast_param)
            return loss

        
        if not self._checked:
            # 1. check consistence between signature and param_map
            func_spect = inspect.getfullargspec(self.get_loss)
@@ -112,14 +110,14 @@ class LossBase(object):
            for func_arg, input_arg in self.param_map.items():
                if func_arg not in func_args:
                    raise NameError(f"`{func_arg}` not in {_get_func_signature(self.get_loss)}.")

            
            # 2. only part of the param_map are passed, left are not
            for arg in func_args:
                if arg not in self.param_map:
                    self.param_map[arg] = arg  # This param does not need mapping.
            self._evaluate_args = func_args
            self._reverse_param_map = {input_arg: func_arg for func_arg, input_arg in self.param_map.items()}

        
        # need to wrap inputs in dict.
        mapped_pred_dict = {}
        mapped_target_dict = {}
@@ -139,7 +137,7 @@ class LossBase(object):
                not_duplicate_flag += 1
            if not_duplicate_flag == 3:
                duplicated.append(input_arg)

        
        # missing
        if not self._checked:
            check_res = _check_arg_dict_list(self.get_loss, [mapped_pred_dict, mapped_target_dict])
@@ -149,47 +147,50 @@ class LossBase(object):
            for idx, func_arg in enumerate(missing):
                # Don't delete `` in this information, nor add ``
                replaced_missing[idx] = f"{self.param_map[func_arg]}" + f"(assign to `{func_arg}` " \
                                                                        f"in `{self.__class__.__name__}`)"

                    f"in `{self.__class__.__name__}`)"
            
            check_res = _CheckRes(missing=replaced_missing,
                                  unused=check_res.unused,
                                  duplicated=duplicated,
                                  required=check_res.required,
                                  all_needed=check_res.all_needed,
                                  varargs=check_res.varargs)

            
            if check_res.missing or check_res.duplicated:
                raise _CheckError(check_res=check_res,
                                  func_signature=_get_func_signature(self.get_loss))
        refined_args = _build_args(self.get_loss, **mapped_pred_dict, **mapped_target_dict)

        
        loss = self.get_loss(**refined_args)
        self._checked = True

        
        return loss


 class LossFunc(LossBase):
    """提供给用户使用自定义损失函数的类
    """
    def __init__(self, func, key_map=None, **kwargs):
        """
    别名：:class:`fastNLP.LossFunc` :class:`fastNLP.core.losses.LossFunc`

        :param func: 用户自行定义的损失函数，应当为一个函数或者callable(func)为True的ojbect
        :param dict key_map: 参数映射表。键为Model/DataSet参数名，值为损失函数参数名。
                             fastNLP的trainer将在训练时从模型返回值或者训练数据DataSet的target=True的field中
                             找到相对应的参数名为value的参数，并传入func中作为参数名为key的参数
        :param kwargs: 除了参数映射表以外可以用key word args的方式设置参数映射关系
    提供给用户使用自定义损失函数的类

        Example::
    :param func: 用户自行定义的损失函数，应当为一个函数或者callable(func)为True的ojbect
    :param dict key_map: 参数映射表。键为Model/DataSet参数名，值为损失函数参数名。
                         fastNLP的trainer将在训练时从模型返回值或者训练数据DataSet的target=True的field中
                         找到相对应的参数名为value的参数，并传入func中作为参数名为key的参数
    :param kwargs: 除了参数映射表以外可以用key word args的方式设置参数映射关系

            >>> func = torch.nn.CrossEntropyLoss()
            >>> loss_func = LossFunc(func, input="pred", target="label")
            >>> # 这表示构建了一个损失函数类，由func计算损失函数，其中将从模型返回值或者DataSet的target=True的field
            >>> # 当中找到一个参数名为`pred`的参数传入func一个参数名为`input`的参数；找到一个参数名为`label`的参数
            >>> # 传入func作为一个名为`target`的参数
    Example::

        """
        >>> func = torch.nn.CrossEntropyLoss()
        >>> loss_func = LossFunc(func, input="pred", target="label")
        # 这表示构建了一个损失函数类，由func计算损失函数，其中将从模型返回值或者DataSet的target=True的field
        # 当中找到一个参数名为`pred`的参数传入func一个参数名为`input`的参数；找到一个参数名为`label`的参数
        # 传入func作为一个名为`target`的参数

    """
    
    def __init__(self, func, key_map=None, **kwargs):
        
        super(LossFunc, self).__init__()
        _check_function_or_method(func)
        if key_map is not None:
@@ -199,94 +200,108 @@ class LossFunc(LossBase):
        if len(kwargs) > 0:
            for key, val in kwargs.items():
                self.param_map.update({key: val})

        
        self.get_loss = func


 class CrossEntropyLoss(LossBase):
    """
     .. _CrossEntropyLoss:
    别名：:class:`fastNLP.CrossEntropyLoss` :class:`fastNLP.core.losses.CrossEntropyLoss`

    交叉熵损失函数"""
    def __init__(self, pred=None, target=None, padding_idx=-100):
        """
        :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
        :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
        :param padding_idx: padding的index，在计算loss时将忽略target中标号为padding_idx的内容
    交叉熵损失函数
    
    :param pred: 参数映射表中 `pred` 的映射关系，None表示映射关系为 `pred` -> `pred`
    :param target: 参数映射表中 `target` 的映射关系，None表示映射关系为 `target` -> `target`
    :param padding_idx: padding的index，在计算loss时将忽略target中标号为padding_idx的内容

        Example::
    Example::

            >>> loss = CrossEntropyLoss(pred='pred', target='label', padding_idx=0)
        """
        >>> loss = CrossEntropyLoss(pred='pred', target='label', padding_idx=0)
        
    """
    
    def __init__(self, pred=None, target=None, padding_idx=-100):
        # TODO 需要做一些检查，F.cross_entropy在计算时，如果pred是(16, 10 ,4), target的形状按道理应该是(16, 10), 但实际却需要
        # TODO  （16， 4）
        super(CrossEntropyLoss, self).__init__()
        self._init_param_map(pred=pred, target=target)
        self.padding_idx = padding_idx

    
    def get_loss(self, pred, target):
        return F.cross_entropy(input=pred, target=target,
                               ignore_index=self.padding_idx)


 class L1Loss(LossBase):
    """L1损失函数"""
    """
    别名：:class:`fastNLP.L1Loss` :class:`fastNLP.core.losses.L1Loss`

    L1损失函数
    
    :param pred: 参数映射表中 `pred` 的映射关系，None表示映射关系为 `pred` -> `pred`
    :param target: 参数映射表中 `target` 的映射关系，None表示映射关系为 `target` >`target`
    
    """
    
    def __init__(self, pred=None, target=None):
        """
        :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
        :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
        """
        super(L1Loss, self).__init__()
        self._init_param_map(pred=pred, target=target)

    
    def get_loss(self, pred, target):
        return F.l1_loss(input=pred, target=target)


 class BCELoss(LossBase):
    """二分类交叉熵损失函数"""
    """
    别名：:class:`fastNLP.BCELoss` :class:`fastNLP.core.losses.BCELoss`

    二分类交叉熵损失函数
    
    :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
    :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
    """
    
    def __init__(self, pred=None, target=None):
        """
        :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
        :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
        """
        super(BCELoss, self).__init__()
        self._init_param_map(pred=pred, target=target)

    
    def get_loss(self, pred, target):
        return F.binary_cross_entropy(input=pred, target=target)


 class NLLLoss(LossBase):
    """负对数似然损失函数"""
    """
    别名：:class:`fastNLP.NLLLoss` :class:`fastNLP.core.losses.NLLLoss`
    
    负对数似然损失函数
    
    :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
    :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
    """
    
    def __init__(self, pred=None, target=None):
        """
        :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
        :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
        """
        super(NLLLoss, self).__init__()
        self._init_param_map(pred=pred, target=target)

    
    def get_loss(self, pred, target):
        return F.nll_loss(input=pred, target=target)


 class LossInForward(LossBase):
    """

     .. _LossInForward:
    别名：:class:`fastNLP.LossInForward` :class:`fastNLP.core.losses.LossInForward`

    从forward()函数返回结果中获取loss
    
    :param str loss_key: 在forward函数中loss的键名，默认为loss
    """
    
    def __init__(self, loss_key='loss'):
        """
        :param str loss_key: 在forward函数中loss的键名，默认为loss
        """
        super().__init__()
        if not isinstance(loss_key, str):
            raise TypeError(f"Only str allowed for loss_key, got {type(loss_key)}.")
        self.loss_key = loss_key

    
    def get_loss(self, **kwargs):
        if self.loss_key not in kwargs:
            check_res = _CheckRes(
@@ -298,17 +313,17 @@ class LossInForward(LossBase):
                varargs=[])
            raise _CheckError(check_res=check_res, func_signature=_get_func_signature(self.get_loss))
        return kwargs[self.loss_key]

    
    def __call__(self, pred_dict, target_dict, check=False):

        
        loss = self.get_loss(**pred_dict)

        
        if not (isinstance(loss, torch.Tensor) and len(loss.size()) == 0):
            if not isinstance(loss, torch.Tensor):
                raise TypeError(f"Loss excepted to be a torch.Tensor, got {type(loss)}")
            loss = torch.sum(loss) / (loss.view(-1)).size(0)
            # raise RuntimeError(f"The size of loss excepts to be torch.Size([]), got {loss.size()}")

        
        return loss


@@ -378,13 +393,13 @@ def mask(predict, truth, **kwargs):
    if kwargs.get("mask") is None:
        return predict, truth
    mask = kwargs["mask"]

    
    predict, truth = squash(predict, truth)
    mask = mask.view(-1, )

    
    predict = torch.masked_select(predict.permute(1, 0), mask).view(predict.size()[-1], -1).permute(1, 0)
    truth = torch.masked_select(truth, mask)

    
    return predict, truth


@@ -399,4 +414,3 @@ def make_mask(lens, tar_len):
    mask = [torch.ge(lens, i + 1) for i in range(tar_len)]
    mask = torch.stack(mask, 1)
    return mask

--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -1,31 +1,25 @@
 """

 .. _Metric:
 metrics 模块实现了 fastNLP 所需的各种常用衡量指标，一般做为 :class:`~fastNLP.Trainer` 的参数使用。

 """




 import inspect
 from collections import defaultdict

 import numpy as np
 import torch

 from fastNLP.core.utils import _CheckError
 from fastNLP.core.utils import _CheckRes
 from fastNLP.core.utils import _build_args
 from fastNLP.core.utils import _check_arg_dict_list
 from fastNLP.core.utils import _get_func_signature
 from fastNLP.core.utils import seq_lens_to_masks
 from fastNLP.core.vocabulary import Vocabulary
 from .utils import _CheckError
 from .utils import _CheckRes
 from .utils import _build_args
 from .utils import _check_arg_dict_list
 from .utils import _get_func_signature
 from .utils import seq_lens_to_masks
 from .vocabulary import Vocabulary


 class MetricBase(object):
    """所有metrics的基类

    所有的传入到Trainer, Tester的Metric需要继承自该对象。需要覆盖写入evaluate(), get_metric()方法。
    """
    所有metrics的基类,，所有的传入到Trainer, Tester的Metric需要继承自该对象，需要覆盖写入evaluate(), get_metric()方法。
    
        evaluate(xxx)中传入的是一个batch的数据。
        
@@ -94,17 +88,17 @@ class MetricBase(object):
                return {'acc': acc} # 需要返回一个dict，key为该metric的名称，该名称会显示到Trainer的progress bar中


    ``MetricBase`` 将会在输入的字典``pred_dict``和``target_dict``中进行检查.
    ``pred_dict`` 是模型当中``forward()``函数或者``predict()``函数的返回值.
    ``target_dict`` 是DataSet当中的ground truth, 判定ground truth的条件是field的``is_target``被设置为True.
    ``MetricBase`` 将会在输入的字典 ``pred_dict`` 和 ``target_dict`` 中进行检查.
    ``pred_dict`` 是模型当中 ``forward()`` 函数或者 ``predict()`` 函数的返回值.
    ``target_dict`` 是DataSet当中的ground truth, 判定ground truth的条件是field的 ``is_target`` 被设置为True.

    ``MetricBase`` 会进行以下的类型检测:

    1. self.evaluate当中是否有varargs, 这是不支持的.
    2. self.evaluate当中所需要的参数是否既不在``pred_dict``也不在``target_dict``.
    3. self.evaluate当中所需要的参数是否既在``pred_dict``也在``target_dict``.
    2. self.evaluate当中所需要的参数是否既不在 ``pred_dict`` 也不在 ``target_dict`` .
    3. self.evaluate当中所需要的参数是否既在 ``pred_dict`` 也在 ``target_dict`` .

    除此以外，在参数被传入self.evaluate以前，这个函数会检测``pred_dict``和``target_dict``当中没有被用到的参数
    除此以外，在参数被传入self.evaluate以前，这个函数会检测 ``pred_dict`` 和 ``target_dict`` 当中没有被用到的参数
    如果kwargs是self.evaluate的参数，则不会检测


@@ -267,13 +261,18 @@ class MetricBase(object):


 class AccuracyMetric(MetricBase):
    """准确率Metric"""
    """
    
    别名：:class:`fastNLP.AccuracyMetric` :class:`fastNLP.core.metrics.AccuracyMetric`

    准确率Metric（其它的Metric参见 :doc:`fastNLP.core.metrics` ）
    
    :param pred: 参数映射表中 `pred` 的映射关系，None表示映射关系为 `pred` -> `pred`
    :param target: 参数映射表中 `target` 的映射关系，None表示映射关系为 `target` -> `target`
    :param seq_len: 参数映射表中 `seq_lens` 的映射关系，None表示映射关系为 `seq_len` -> `seq_len`
    """
    def __init__(self, pred=None, target=None, seq_len=None):
        """
        :param pred: 参数映射表中`pred`的映射关系，None表示映射关系为`pred`->`pred`
        :param target: 参数映射表中`target`的映射关系，None表示映射关系为`target`->`target`
        :param seq_len: 参数映射表中`seq_lens`的映射关系，None表示映射关系为`seq_len`->`seq_len`
        """
        
        super().__init__()

        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
@@ -282,7 +281,8 @@ class AccuracyMetric(MetricBase):
        self.acc_count = 0

    def evaluate(self, pred, target, seq_len=None):
        """evaluate函数将针对一个批次的预测结果做评价指标的累计
        """
        evaluate函数将针对一个批次的预测结果做评价指标的累计

        :param torch.Tensor pred: 预测的tensor, tensor的形状可以是torch.Size([B,]), torch.Size([B, n_classes]),
                torch.Size([B, max_len]), 或者torch.Size([B, max_len, n_classes])
@@ -327,7 +327,8 @@ class AccuracyMetric(MetricBase):
            self.total += np.prod(list(pred.size()))

    def get_metric(self, reset=True):
        """get_metric函数将根据evaluate函数累计的评价指标统计量来计算最终的评价结果.
        """
        get_metric函数将根据evaluate函数累计的评价指标统计量来计算最终的评价结果.

        :param bool reset: 在调用完get_metric后是否清空评价指标统计量.
        :return dict evaluate_result: {"acc": float}
@@ -430,8 +431,6 @@ def _bio_tag_to_spans(tags, ignore_labels=None):
 class SpanFPreRecMetric(MetricBase):
    """

     .. _SpanFPreRecMetric:

    在序列标注问题中，以span的方式计算F, pre, rec.
    比如中文Part of speech中，会以character的方式进行标注，句子'中国在亚洲'对应的POS可能为(以BMES为例)
    ['B-NN', 'E-NN', 'S-DET', 'B-NN', 'E-NN']。该metric就是为类似情况下的F1计算。
@@ -455,26 +454,24 @@ class SpanFPreRecMetric(MetricBase):
            ...
        }

    :param tag_vocab: 标签的 :class:`~fastNLP.Vocabulary` 。支持的标签为"B"(没有label)；或"B-xxx"(xxx为某种label，比如POS中的NN)，
        在解码时，会将相同xxx的认为是同一个label，比如['B-NN', 'E-NN']会被合并为一个'NN'.
    :param str pred: 用该key在evaluate()时从传入dict中取出prediction数据。 为None，则使用'pred'取数据
    :param str target: 用该key在evaluate()时从传入dict中取出target数据。 为None，则使用'target'取数据
    :param str seq_len: 用该key在evaluate()时从传入dict中取出sequence length数据。为None，则使用'seq_lens'取数据。
    :param str encoding_type: 目前支持bio, bmes
    :param list ignore_labels: str 组成的list. 这个list中的class不会被用于计算。例如在POS tagging时传入['NN']，则不会计算'NN'这
        个label
    :param bool only_gross: 是否只计算总的f1, precision, recall的值；如果为False，不仅返回总的f1, pre, rec, 还会返回每个
        label的f1, pre, rec
    :param str f_type: 'micro'或'macro'. 'micro':通过先计算总体的TP，FN和FP的数量，再计算f, precision, recall; 'macro':
        分布计算每个类别的f, precision, recall，然后做平均（各类别f的权重相同）
    :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
        则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    """
    def __init__(self, tag_vocab, pred=None, target=None, seq_len=None, encoding_type='bio', ignore_labels=None,
                  only_gross=True, f_type='micro', beta=1):
        """

        :param Vocabulary tag_vocab: 标签的vocabulary。支持的标签为"B"(没有label)；或"B-xxx"(xxx为某种label，比如POS中的NN)，
            在解码时，会将相同xxx的认为是同一个label，比如['B-NN', 'E-NN']会被合并为一个'NN'.
        :param str pred: 用该key在evaluate()时从传入dict中取出prediction数据。 为None，则使用'pred'取数据
        :param str target: 用该key在evaluate()时从传入dict中取出target数据。 为None，则使用'target'取数据
        :param str seq_len: 用该key在evaluate()时从传入dict中取出sequence length数据。为None，则使用'seq_lens'取数据。
        :param str encoding_type: 目前支持bio, bmes
        :param list ignore_labels: str 组成的list. 这个list中的class不会被用于计算。例如在POS tagging时传入['NN']，则不会计算'NN'这
            个label
        :param bool only_gross: 是否只计算总的f1, precision, recall的值；如果为False，不仅返回总的f1, pre, rec, 还会返回每个
            label的f1, pre, rec
        :param str f_type: 'micro'或'macro'. 'micro':通过先计算总体的TP，FN和FP的数量，再计算f, precision, recall; 'macro':
            分布计算每个类别的f, precision, recall，然后做平均（各类别f的权重相同）
        :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
            则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
        """
        
        encoding_type = encoding_type.lower()

        if not isinstance(tag_vocab, Vocabulary):
@@ -647,20 +644,18 @@ class BMESF1PreRecMetric(MetricBase):
        target形状为 (batch_size, max_len)
        seq_lens形状为 (batch_size, )

    """
    需要申明BMES这四种tag中，各种tag对应的idx。所有不为b_idx, m_idx, e_idx, s_idx的数字都认为是s_idx。

    :param b_idx: int, Begin标签所对应的tag idx.
    :param m_idx: int, Middle标签所对应的tag idx.
    :param e_idx: int, End标签所对应的tag idx.
    :param s_idx: int, Single标签所对应的tag idx
    :param pred: str, 用该key在evaluate()时从传入dict中取出prediction数据。 为None，则使用'pred'取数据
    :param target: str, 用该key在evaluate()时从传入dict中取出target数据。 为None，则使用'target'取数据
    :param seq_len: str, 用该key在evaluate()时从传入dict中取出seqence length数据。为None，则使用'seq_len'取数据。
    """
    
    def __init__(self, b_idx=0, m_idx=1, e_idx=2, s_idx=3, pred=None, target=None, seq_len=None):
        """
        需要申明BMES这四种tag中，各种tag对应的idx。所有不为b_idx, m_idx, e_idx, s_idx的数字都认为是s_idx。

        :param b_idx: int, Begin标签所对应的tag idx.
        :param m_idx: int, Middle标签所对应的tag idx.
        :param e_idx: int, End标签所对应的tag idx.
        :param s_idx: int, Single标签所对应的tag idx
        :param pred: str, 用该key在evaluate()时从传入dict中取出prediction数据。 为None，则使用'pred'取数据
        :param target: str, 用该key在evaluate()时从传入dict中取出target数据。 为None，则使用'target'取数据
        :param seq_len: str, 用该key在evaluate()时从传入dict中取出seqence length数据。为None，则使用'seq_len'取数据。
        """
        super().__init__()

        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
@@ -831,21 +826,23 @@ def _pred_topk(y_prob, k=1):


 class SQuADMetric(MetricBase):
    """SQuAD数据集metric
    """
    SQuAD数据集metric
    
    :param pred1: 参数映射表中`pred1`的映射关系，None表示映射关系为`pred1`->`pred1`
    :param pred2: 参数映射表中`pred2`的映射关系，None表示映射关系为`pred2`->`pred2`
    :param target1: 参数映射表中`target1`的映射关系，None表示映射关系为`target1`->`target1`
    :param target2: 参数映射表中`target2`的映射关系，None表示映射关系为`target2`->`target2`
    :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
        则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    :param bool right_open: right_open为true表示start跟end指针指向一个左闭右开区间，为false表示指向一个左闭右闭区间。
    :param bool print_predict_stat: True则输出预测答案是否为空与正确答案是否为空的统计信息, False则不输出
    
    """

    def __init__(self, pred1=None, pred2=None, target1=None, target2=None,
                 beta=1, right_open=True, print_predict_stat=False):
        """
        :param pred1: 参数映射表中`pred1`的映射关系，None表示映射关系为`pred1`->`pred1`
        :param pred2: 参数映射表中`pred2`的映射关系，None表示映射关系为`pred2`->`pred2`
        :param target1: 参数映射表中`target1`的映射关系，None表示映射关系为`target1`->`target1`
        :param target2: 参数映射表中`target2`的映射关系，None表示映射关系为`target2`->`target2`
        :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
            则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
        :param bool right_open: right_open为true表示start跟end指针指向一个左闭右开区间，为false表示指向一个左闭右闭区间。
        :param bool print_predict_stat: True则输出预测答案是否为空与正确答案是否为空的统计信息, False则不输出
        """
        
        super(SQuADMetric, self).__init__()

        self._init_param_map(pred1=pred1, pred2=pred2, target1=target1, target2=target2)
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -1,11 +1,16 @@
 """
 optimizer 模块定义了 fastNLP 中所需的各种优化器，一般做为 :class:`~fastNLP.Trainer` 的参数使用。

 """
 import torch


 class Optimizer(object):
    """
    别名：:class:`fastNLP.Optimizer` :class:`fastNLP.core.optimizer.Optimizer`

        :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
        :param kwargs: additional parameters.
    :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
    :param kwargs: additional parameters.
    """
    def __init__(self, model_params, **kwargs):
        if model_params is not None and not hasattr(model_params, "__next__"):
@@ -26,10 +31,11 @@ class Optimizer(object):

 class SGD(Optimizer):
    """
    别名：:class:`fastNLP.SGD` :class:`fastNLP.core.optimizer.SGD`

        :param float lr: learning rate. Default: 0.01
        :param float momentum: momentum. Default: 0
        :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
    :param float lr: learning rate. Default: 0.01
    :param float momentum: momentum. Default: 0
    :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
    """

    def __init__(self, lr=0.001, momentum=0, model_params=None):
@@ -47,10 +53,11 @@ class SGD(Optimizer):

 class Adam(Optimizer):
    """
    别名：:class:`fastNLP.Adam` :class:`fastNLP.core.optimizer.Adam`

        :param float lr: learning rate
        :param float weight_decay:
        :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
    :param float lr: learning rate
    :param float weight_decay:
    :param model_params: a generator. E.g. ``model.parameters()`` for PyTorch models.
    """

    def __init__(self, lr=0.001, weight_decay=0, betas=(0.9, 0.999), eps=1e-8, amsgrad=False, model_params=None):
--- a/fastNLP/core/predictor.py
+++ b/fastNLP/core/predictor.py
@@ -2,10 +2,10 @@ from collections import defaultdict

 import torch

 from fastNLP.core import Batch
 from fastNLP.core import DataSet
 from fastNLP.core import SequentialSampler
 from fastNLP.core.utils import _build_args
 from . import Batch
 from . import DataSet
 from . import SequentialSampler
 from .utils import _build_args


 class Predictor(object):
--- a/fastNLP/core/sampler.py
+++ b/fastNLP/core/sampler.py
@@ -1,22 +1,24 @@
 """
 sampler 子类实现了 fastNLP 所需的各种采样器。

  .. _Sampler:

 """



 __all__ = ["Sampler", "BucketSampler", "SequentialSampler", "RandomSampler"]
 from itertools import chain

 import numpy as np
 import torch


 class Sampler(object):
    """ `Sampler` 类的基类. 规定以何种顺序取出data中的元素
    """
    别名：:class:`fastNLP.Sampler` :class:`fastNLP.core.sampler.Sampler`

     
    `Sampler` 类的基类. 规定以何种顺序取出data中的元素

    子类必须实现 ``__call__`` 方法. 输入 `DataSet` 对象, 返回其中元素的下标序列
    """

    
    def __call__(self, data_set):
        """
       :param DataSet data_set: `DataSet` 对象, 需要Sample的数据
@@ -26,56 +28,62 @@ class Sampler(object):


 class SequentialSampler(Sampler):
    """顺序取出元素的 `Sampler`

        .. _SequentialSampler:
    """
    别名：:class:`fastNLP.SequentialSampler` :class:`fastNLP.core.sampler.SequentialSampler`
     
    顺序取出元素的 `Sampler`

    """
    
    def __call__(self, data_set):
        return list(range(len(data_set)))


 class RandomSampler(Sampler):
    """

      .. _RandomSampler:
    别名：:class:`fastNLP.RandomSampler` :class:`fastNLP.core.sampler.RandomSampler`

    随机化取元素的 `Sampler`

    """
    
    def __call__(self, data_set):
        return list(np.random.permutation(len(data_set)))


 class BucketSampler(Sampler):
    """带Bucket的 `Random Sampler`. 可以随机地取出长度相似的元素
    """
    别名：:class:`fastNLP.BucketSampler` :class:`fastNLP.core.sampler.BucketSampler`

    带Bucket的 `Random Sampler`. 可以随机地取出长度相似的元素

    :param int num_buckets: bucket的数量
    :param int batch_size: batch的大小
    :param str seq_lens_field_name: 对应序列长度的 `field` 的名字
    """
    
    def __init__(self, num_buckets=10, batch_size=32, seq_lens_field_name='seq_len'):
        self.num_buckets = num_buckets
        self.batch_size = batch_size
        self.seq_lens_field_name = seq_lens_field_name

    
    def __call__(self, data_set):
        seq_lens = data_set.get_all_fields()[self.seq_lens_field_name].content
        total_sample_num = len(seq_lens)

        
        bucket_indexes = []
        assert total_sample_num>=self.num_buckets, "The number of samples is smaller than the number of buckets."
        assert total_sample_num >= self.num_buckets, "The number of samples is smaller than the number of buckets."
        num_sample_per_bucket = total_sample_num // self.num_buckets
        for i in range(self.num_buckets):
            bucket_indexes.append([num_sample_per_bucket * i, num_sample_per_bucket * (i + 1)])
        bucket_indexes[-1][1] = total_sample_num

        
        sorted_seq_lens = list(sorted([(idx, seq_len) for
                                       idx, seq_len in zip(range(total_sample_num), seq_lens)],
                                      key=lambda x: x[1]))

        
        batchs = []

        
        left_init_indexes = []
        for b_idx in range(self.num_buckets):
            start_idx = bucket_indexes[b_idx][0]
@@ -90,7 +98,7 @@ class BucketSampler(Sampler):
        if (left_init_indexes) != 0:
            batchs.append(left_init_indexes)
        np.random.shuffle(batchs)

        
        return list(chain(*batchs))


@@ -128,10 +136,10 @@ def k_means_1d(x, k, max_iter=100):
    if len(sorted_x) < k:
        raise ValueError("too few buckets")
    gap = len(sorted_x) / k

    
    centroids = np.array([sorted_x[int(x * gap)] for x in range(k)])
    assign = None

    
    for i in range(max_iter):
        # Cluster Assignment step
        assign = np.array([np.argmin([np.absolute(x_i - x) for x in centroids]) for x_i in x])
@@ -163,7 +171,7 @@ def k_means_bucketing(lengths, buckets):
    bucket_data = [[] for _ in buckets]
    num_buckets = len(buckets)
    _, assignments = k_means_1d(lengths, num_buckets)

    
    for idx, bucket_id in enumerate(assignments):
        if buckets[bucket_id] is None or lengths[idx] <= buckets[bucket_id]:
            bucket_data[bucket_id].append(idx)
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -1,81 +1,81 @@
 import torch
 from torch import nn
 """
 tester模块实现了 fastNLP 所需的Tester类，能在提供数据、模型以及metric的情况下进行性能测试。

 from fastNLP.core.batch import Batch
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.metrics import _prepare_metrics
 from fastNLP.core.sampler import SequentialSampler
 from fastNLP.core.utils import _CheckError
 from fastNLP.core.utils import _build_args
 from fastNLP.core.utils import _check_loss_evaluate
 from fastNLP.core.utils import _move_dict_value_to_device
 from fastNLP.core.utils import _get_func_signature
 from fastNLP.core.utils import _get_model_device
 from fastNLP.core.utils import _move_model_to_device
 Example::

    import numpy as np
    import torch
    from torch import nn
    from fastNLP import Tester
    from fastNLP import DataSet
    from fastNLP import AccuracyMetric

 class Tester(object):
    """
    Tester是在提供数据，模型以及metric的情况下进行性能测试的类
    class Model(nn.Module):
        def __init__(self):
            super().__init__()
            self.fc = nn.Linear(1, 1)
        def forward(self, a):
            return {'pred': self.fc(a.unsqueeze(1)).squeeze(1)}

    Example::
    model = Model()

        import numpy as np
        import torch
        from torch import nn
        from fastNLP import Tester
        from fastNLP import DataSet
        from fastNLP import AccuracyMetric
    dataset = DataSet({'a': np.arange(10, dtype=float), 'b':np.arange(10, dtype=float)*2})

    dataset.set_input('a')
    dataset.set_target('b')

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(1, 1)
            def forward(self, a):
                return {'pred': self.fc(a.unsqueeze(1)).squeeze(1)}
    tester = Tester(dataset, model, metrics=AccuracyMetric())
    eval_results = tester.test()

        model = Model()
 这里Metric的映射规律是和 :class:`fastNLP.Trainer` 中一致的，具体使用请参考 :doc:`trainer 模块<fastNLP.core.trainer>` 的1.3部分

        dataset = DataSet({'a': np.arange(10, dtype=float), 'b':np.arange(10, dtype=float)*2})

        dataset.set_input('a')
        dataset.set_target('b')

        tester = Tester(dataset, model, metrics=AccuracyMetric())
        eval_results = tester.test()

    这里Metric的映射规律是和 Trainer_ 中一致的，请参考 Trainer_ 使用metrics。
 """
 import torch
 from torch import nn

 from .batch import Batch
 from .dataset import DataSet
 from .metrics import _prepare_metrics
 from .sampler import SequentialSampler
 from .utils import _CheckError
 from .utils import _build_args
 from .utils import _check_loss_evaluate
 from .utils import _move_dict_value_to_device
 from .utils import _get_func_signature
 from .utils import _get_model_device
 from .utils import _move_model_to_device


 class Tester(object):
    """
    别名：:class:`fastNLP.Tester` :class:`fastNLP.core.tester.Tester`

    def __init__(self, data, model, metrics, batch_size=16, device=None, verbose=1):
        """传入模型，数据以及metric进行验证。

            :param DataSet data: 需要测试的数据集
            :param torch.nn.module model: 使用的模型
            :param MetricBase metrics: 一个Metric或者一个列表的metric对象
            :param int batch_size: evaluation时使用的batch_size有多大。
            :param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None，即Trainer不对模型
                的计算位置进行管理。支持以下的输入:
    Tester是在提供数据，模型以及metric的情况下进行性能测试的类。需要传入模型，数据以及metric进行验证。

                1. str: ['cpu', 'cuda', 'cuda:0', 'cuda:1', ...] 依次为'cpu'中, 可见的第一个GPU中, 可见的第一个GPU中,
                可见的第二个GPU中;
    :param data: 需要测试的数据集， :class:`~fastNLP.DataSet` 类型
    :param torch.nn.module model: 使用的模型
    :param metrics: :class:`~fastNLP.core.metrics.MetricBase` 或者一个列表的 :class:`~fastNLP.core.metrics.MetricBase`
    :param int batch_size: evaluation时使用的batch_size有多大。
    :param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None，即Trainer不对模型
        的计算位置进行管理。支持以下的输入:

                2. torch.device：将模型装载到torch.device上。
        1. str: ['cpu', 'cuda', 'cuda:0', 'cuda:1', ...] 依次为'cpu'中, 可见的第一个GPU中, 可见的第一个GPU中,
        可见的第二个GPU中;

                3. int: 将使用device_id为该值的gpu进行训练
        2. torch.device：将模型装载到torch.device上。

                4. list(int)：如果多于1个device，将使用torch.nn.DataParallel包裹model, 并使用传入的device。
        3. int: 将使用device_id为该值的gpu进行训练

                5. None. 为None则不对模型进行任何处理，如果传入的model为torch.nn.DataParallel该值必须为None。
        4. list(int)：如果多于1个device，将使用torch.nn.DataParallel包裹model, 并使用传入的device。

            :param int verbose: 如果为0不输出任何信息; 如果为1，打印出验证结果。
        5. None. 为None则不对模型进行任何处理，如果传入的model为torch.nn.DataParallel该值必须为None。

        """
    :param int verbose: 如果为0不输出任何信息; 如果为1，打印出验证结果。
    """

    def __init__(self, data, model, metrics, batch_size=16, device=None, verbose=1):
        super(Tester, self).__init__()

        if not isinstance(data, DataSet):
@@ -103,7 +103,7 @@ class Tester(object):
    def test(self):
        """开始进行验证，并返回验证结果。

        :return dict(dict) eval_results: dict为二层嵌套结构，dict的第一层是metric的名称; 第二层是这个metric的指标。
        :return Dict[Dict] : dict的二层嵌套结构，dict的第一层是metric的名称; 第二层是这个metric的指标。
            一个AccuracyMetric的例子为{'AccuracyMetric': {'acc': 1.0}}。
        """
        # turn on the testing mode; clean up the history
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -1,13 +1,17 @@
 """
 Trainer的说明文档

 .. _Trainer:

 Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在不同训练任务中重复撰写 (1) epoch循环; (2) 将数据分成不同的Batch; (3)
 对Batch进行pad; (4) 每个epoch结束或一定step后进行验证集验证; (5) 保存获得更好验证性能的模型等。

 1. Trainer的基本使用

 Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在不同训练任务中重复撰以下步骤的代码

    (1) epoch循环;
    
    (2) 将数据分成不同的Batch;
    
    (3) 对Batch进行pad;
    
    (4) 每个epoch结束或一定step后进行验证集验证;
    
    (5) 保存获得更好验证性能的模型。

 1 Trainer的基本使用
    下面的例子是使用神经网络来进行预测一个序列中是否有偶数个1。

    Example::
@@ -20,8 +24,8 @@ Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在

        from fastNLP import DataSet
        from fastNLP import Trainer
        from fastNLP.core.losses import CrossEntropyLoss
        from fastNLP.core.metrics import AccuracyMetric
        from fastNLP import CrossEntropyLoss
        from fastNLP import AccuracyMetric
        from fastNLP.modules.decoder import MLP

        # 模型
@@ -56,208 +60,214 @@ Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在
    由上面的例子可以看出通过使用Trainer，可以使得训练部分的代码大幅减少。
    使用Trainer需要满足以下几个条件:

    1. 模型
 1.1 模型
    1 模型的forward()的参数名需要与DataSet中的名字对应。实际上fastNLP在将DataSet中的数据传递给模型forward()时，是
    通过匹配名称实现的。所以上例中，如果Model的forward函数修改为forward(self, data), 则DataSet中的'x'这个field就应该
    改名为'data'。

        1. 模型的forward()的参数名需要与DataSet中的名字对应。实际上fastNLP在将DataSet中的数据传递给模型forward()时，是
        通过匹配名称实现的。所以上例中，如果Model的forward函数修改为forward(self, data), 则DataSet中的'x'这个field就应该
        改名为'data'。
    2 传递给forward()的参数是DataSet中被设置为input的那些field。但如果forward()中没有对应的参数，则不会将数据传递
    给forward()。例如，DataSet中'x1', 'x2'都是input，但是模型的函数为forward(self, x1), 那么'x2'不会传递给forward()。

        2. 传递给forward()的参数是DataSet中被设置为input的那些field。但如果forward()中没有对应的参数，则不会将数据传递
        给forward()。例如，DataSet中'x1', 'x2'都是input，但是模型的函数为forward(self, x1), 那么'x2'不会传递给forward()。
    3 模型的forward()返回值需要为一个dict。

        3. 模型的forward()返回值需要为一个dict。
 1.2 Loss
    fastNLP中的为了不限制forward函数的返回内容数量(比如一些复杂任务需要返回多个内容，如Dependency Parsing，
    :mod:`Loss<fastNLP.core.losses>` 与 :mod:`Metric<fastNLP.core.metrics>` 都使用了通过名称来匹配相应内容的策略。如上面的例子中

    2. Loss
    Example::

    fastNLP中的为了不限制forward函数的返回内容数量(比如一些复杂任务需要返回多个内容，如Dependency Parsing， Loss_ 与 Metric_ 都使
    用了通过名称来匹配相应内容的策略。如上面的例子中
        trainer = Trainer(tr_dataset, model, loss=CrossEntropyLoss(target='label'),
                   optimizer=SGD(model.parameters(), lr=0.1),n_epochs=1000,
                   dev_data = dev_data, metrics=AccuracyMetric(target='label'))

    loss被设置为了 :class:`~fastNLP.CrossEntropyLoss` , 但在初始化的时候传入了target='label'这个参数，
    :class:`~fastNLP.CrossEntropyLoss` 的初始化参数为(pred=None, target=None, padding_idx=-100)。
    
    这里的两个参数分别为计算CrossEntropy时需要使用到的模型的预测值与真实值。
    其中 `pred` 一般来自于模型forward()的返回结果，`target` 一般是来自于DataSet中被设置为target的field。
    由于每个人对真实值或者model的返回值取名并不一样，所以fastNLP的 :mod:`Loss<fastNLP.core.losses>` 提供一种类似于映射的机制来匹配对应的值，
    比如这里 :class:`~fastNLP.CrossEntropyLoss` 将尝试找到名为'label'的内容来作为真实值得到loss；
    而pred=None, 则 :class:`~fastNLP.CrossEntropyLoss` 使用'pred'作为名称匹配预测值，
    正好forward的返回值也叫pred，所以这里不需要申明pred。

    尽管fastNLP使用了映射机制来使得loss的计算变得比较灵活，但有些情况下loss必须在模型中进行计算，比如使用了CRF的模型。
    fastNLP中提供了 :class:`~fastNLP.LossInForward` 这个loss。
    这个loss的原理是直接在forward()的返回结果中找到loss_key(默认寻找'loss')指定的那个tensor，并使用它作为loss。
    如果Trainer初始化没有提供loss则默认使用 :class:`~fastNLP.LossInForward` 。TODO 补充一个例子  详细例子可以参照

 1.3 Metric
    :mod:`Metric<fastNLP.core.metrics>` 使用了与上述Loss一样的策略，即使用名称进行匹配。
    AccuracyMetric(target='label')的情况与CrossEntropyLoss 是同理的。
    
    在进行验证时，可能用到的计算与forward()中不太一致，没有办法直接从forward()的结果中得到预测值，这时模型可以提供一个predict()方法，
    如果提供的模型具有predict方法，则在模型验证时将调用predict()方法获取预测结果，
    传入到predict()的参数也是从DataSet中被设置为input的field中选择出来的;
    与forward()一样，返回值需要为一个dict。 TODO 补充一个例子 具体例子可以参考

        Example::
 2 Trainer的代码检查
    由于在fastNLP中采取了映射的机制，所以难免可能存在对应出错的情况。Trainer提供一种映射检查机制，可以通过check_code_level来进行控制
    比如下面的例子中，由于各种原因产生的报错

 Example2.1
    ::
    
        import numpy as np
        from torch import nn
        import torch
        from torch.optim import SGD
        from fastNLP import Trainer
        from fastNLP import DataSet

            trainer = Trainer(tr_dataset, model, loss=CrossEntropyLoss(target='label'),
                       optimizer=SGD(model.parameters(), lr=0.1),n_epochs=1000,
                       dev_data = dev_data, metrics=AccuracyMetric(target='label'))
        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(1, 1)
            def forward(self, x, b):
                loss = torch.mean((self.fc(x)-b)**2)
                return {'loss': loss}
        model = Model()

        loss被设置为了 CrossEntropyLoss_ , 但在初始化的时候传入了target='label'这个参数， CrossEntropyLoss_ 的初始化
        参数为(pred=None, target=None, padding_idx=-100)。这里的两个参数分别为计算CrossEntropy时需要使用到的模型的预测值
        与真实值。其中'pred'一般来自于模型forward()的返回结果，'target'一般是来自于DataSet中被设置为target的
        field。由于每个人对真实值或者model的返回值取名并不一样，所以fastNLP的 Loss_ 提供一种类似于映射的机制来匹配
        对应的值，比如这里 CrossEntropyLoss_ 将尝试找到名为'label'的内容来作为真实值得到loss；而pred=None, 则 CrossEntropyLoss_
        使用'pred'作为名称匹配预测值，正好forward的返回值也叫pred，所以这里不需要申明pred。
        dataset = DataSet({'a': np.arange(10), 'b':np.arange(10)*2})
        dataset.set_input('a', 'b')

    尽管fastNLP使用了映射机制来使得loss的计算变得比较灵活，但有些情况下loss必须在模型中进行计算，比如使用了CRF的模型。fastNLP中提供了 LossInForward_ 这
    个loss。这个loss的原理是直接在forward()的返回结果中找到loss_key(默认寻找'loss')指定的那个tensor，
    并使用它作为loss。 如果Trainer初始化没有提供loss则默认使用 LossInForward_ 。详细例子可以参照 TODO 补充一个例子
        trainer = Trainer(dataset, model, loss=None, optimizer=SGD(model.parameters(), lr=0.001))

    3. Metric
        trainer = Trainer(dataset, model, SGD(model.parameters()))
        #  会报以下的错误
        # input fields after batch(if batch size is 2):
        #     a: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])
        #     b: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])
        # There is no target field.
        # ....
        # NameError:
        # Problems occurred when calling Model.forward(self, x, b)
        #     missing param: ['x']
        #     unused field: ['a']
        #     Suggestion: You need to provide ['x'] in DataSet and set it as input.

    Metric_ 使用了与上述Loss一样的策略，即使用名称进行匹配。AccuracyMetric(target='label')的情况与CrossEntropyLoss 是同理的。
    这里就是由于在Trainer初始化的时候，fastNLP会尝试使用一个batch_size=2的batch去运行一遍forward()以及backward()。这里有两类
    信息可以为你提供参考

    在进行验证时，可能用到的计算与forward()中不太一致，没有办法直接从forward()的结果中得到预测值，这时模型可以提供一个predict()方法，
    如果提供的模型具有predict方法，则在模型验证时将调用predict()方法获取预测结果，传入到predict()的参数也是从DataSet中被设置为input
    的field中选择出来的; 与forward()一样，返回值需要为一个dict。具体例子可以参考 TODO 补充一个例子
    1 'input fields after batch...'这部分显示的是train dataset经过Batch操作后，每个field对应的类型以及进行shape。这里
    因为train dataset没有target所以没有显示。根据这里可以看出是否正确将需要的内容设置为了input或target。

 2. Trainer的代码检查
    2 NameError，NameError发生在映射出错的情况。这里报错的原因是由于尝试进行forward计算时(可以通过Model.forward(self, x, b)判断
    出当前是在调取forward)，却没有获取到forward()函数中需要的'x'；在报错信息中同时指出了缺'x'，而'a'没有被使用，那么可能
    就是由于field的名称不对。这里将dataset中'a'这个field的名称改为'x'，或者model的参数从'x'修改为'a'都可以解决问题。

    由于在fastNLP中采取了映射的机制，所以难免可能存在对应出错的情况。Trainer提供一种映射检查机制，可以通过check_code_level来进行控制
    比如下面的例子中，由于各种原因产生的报错
    下面的例子是由于loss计算的时候找不到需要的值

        Example1::

            import numpy as np
            from torch import nn
            import torch
            from torch.optim import SGD
            from fastNLP import Trainer
            from fastNLP import DataSet

            class Model(nn.Module):
                def __init__(self):
                    super().__init__()
                    self.fc = nn.Linear(1, 1)
                def forward(self, x, b):
                    loss = torch.mean((self.fc(x)-b)**2)
                    return {'loss': loss}
            model = Model()

            dataset = DataSet({'a': np.arange(10), 'b':np.arange(10)*2})
            dataset.set_input('a', 'b')

            trainer = Trainer(dataset, model, loss=None, optimizer=SGD(model.parameters(), lr=0.001))

            trainer = Trainer(dataset, model, SGD(model.parameters()))
            #  会报以下的错误
            # input fields after batch(if batch size is 2):
            #     a: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])
            #     b: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])
            # There is no target field.
            # ....
            # NameError:
            # Problems occurred when calling Model.forward(self, x, b)
            #     missing param: ['x']
            #     unused field: ['a']
            #     Suggestion: You need to provide ['x'] in DataSet and set it as input.

        这里就是由于在Trainer初始化的时候，fastNLP会尝试使用一个batch_size=2的batch去运行一遍forward()以及backward()。这里有两类
        信息可以为你提供参考

            1. 'input fields after batch...'这部分显示的是train dataset经过Batch操作后，每个field对应的类型以及进行shape。这里
            因为train dataset没有target所以没有显示。根据这里可以看出是否正确将需要的内容设置为了input或target。

            2. NameError，NameError发生在映射出错的情况。这里报错的原因是由于尝试进行forward计算时(可以通过Model.forward(self, x, b)判断
            出当前是在调取forward)，却没有获取到forward()函数中需要的'x'；在报错信息中同时指出了缺'x'，而'a'没有被使用，那么可能
            就是由于field的名称不对。这里将dataset中'a'这个field的名称改为'x'，或者model的参数从'x'修改为'a'都可以解决问题。

        下面的例子是由于loss计算的时候找不到需要的值

        Example2::

            import numpy as np
            from torch import nn
            from torch.optim import SGD
            from fastNLP import Trainer
            from fastNLP import DataSet
            from fastNLP.core.losses import L1Loss
            import torch

            class Model(nn.Module):
                def __init__(self):
                    super().__init__()
                    self.fc = nn.Linear(1, 1)
                def forward(self, a):
                    return {'pred_b': self.fc(a.unsqueeze(1)).squeeze(1), 'No use':1}

            model = Model()

            dataset = DataSet({'a': np.arange(10, dtype=float), 'b':np.arange(10, dtype=float)*2})

            dataset.set_input('a')
            dataset.set_target('b')

            trainer = Trainer(dataset, model, loss=L1Loss(target='label'), optimizer=SGD(model.parameters(), lr=0.001))
            # 报错信息如下
            # input fields after batch(if batch size is 2):
            #     a: (1)type:torch.Tensor (2)dtype:torch.float32, (3)shape:torch.Size([2])
            # target fields after batch(if batch size is 2):
            #     b: (1)type:torch.Tensor (2)dtype:torch.float32, (3)shape:torch.Size([2])
            # ....
            # NameError:
            # Problems occurred when calling L1Loss.get_loss(self, pred, target)
            #     missing param: ['pred(assign to `pred` in `L1Loss`)', 'label(assign to `target` in `L1Loss`)']
            #     unused field: ['b']
            #     unused param: ['pred_b', 'No use']
            #     target field: ['b']
            #     param from Model.forward(self, a): ['pred_b', 'No use']
            #     Suggestion: (1). Check key assignment for `target` when initialize L1Loss. Or provide `label` in DataSet or output of Model.forward(self, a).
            #             (2). Check key assignment for `pred` when initialize L1Loss. Or provide `pred` in DataSet or output of Model.forward(self, a).

        报错信息也包含两部分:

            1. 第一部分与上面是一样的

            2. 这里报错的原因是由于计算loss的时候找不到相应的值(通过L1Loss.get_loss(self, pred, target)判断出来的)；报错的原因是因为
            `pred`和`label`(我们在初始化L1Loss时将target指定为了label)都没有找到。这里'unused field'是DataSet中出现了，但却没有
            被设置为input或者target的field；'unused param'是forward()中返回且没有被使用到的内容；'target field'是被设置为了
            target的field; 'param from Model.forward(self, a)'是forward()返回的所有key。"Suggestion"是关于当前错误处理的建议。

        但是在一些情况下，比如forward()返回值只有一个，target也只有一个，fastNLP不会进行匹配，而直接将forward()的结果作为pred, 将
        DataSet中的target设置为target。上面的例子在返回值中加入了一个'No use'则只是为了使得Loss去匹配结果。


        下面是带有dev dataset时如果出现错误会发生的报错，

        Example3::

            import numpy as np
            from torch import nn
            from torch.optim import SGD
            from fastNLP import Trainer
            from fastNLP import DataSet
            from fastNLP import AccuracyMetric
            import torch

            class Model(nn.Module):
                def __init__(self):
                    super().__init__()
                    self.fc = nn.Linear(1, 1)
                def forward(self, a, b):
                    loss = torch.mean((self.fc(a.float().unsqueeze(1))-b.float())**2)
                    return {'loss': loss}
                def predict(self, a):  # 使用predict()进行验证
                    return {'output':self.fc(a.float().unsqueeze(1))} #这里return的值不包含'pred'这个key
            model = Model()

            dataset = DataSet({'a': np.arange(10), 'b':np.arange(10)*2})
            dev_data = DataSet({'a': np.arange(10, 20), 'b':np.arange(10, 20)*2})

            dataset.set_input('a', 'b')
            dev_data.set_input('a')  # 这里没有设置target

            trainer = Trainer(dataset, model, loss=None, optimizer=SGD(model.parameters(), lr=0.001),
                             dev_data=dev_data, metrics=AccuracyMetric())

            # 报错信息
            # ...
            # NameError:
            # Problems occurred when calling AccuracyMetric.evaluate(self, pred, target, seq_len=None)
            #     missing param: ['pred(assign to `pred` in `AccuracyMetric`)', 'target(assign to `target` in `AccuracyMetric`)']
            #     unused param: ['output']
            #     target field: []
            #     param from Model.predict(self, a): ['output']
            #     Suggestion: (1). Check key assignment for `pred` when initialize AccuracyMetric. Or provide `pred` in DataSet or output of Model.predict(self, a).
            #             (2). Check key assignment for `target` when initialize AccuracyMetric. Or provide `target` in DataSet or output of Model.predict(self, a).

        报错信息和前面都是类似的，但是可以通过'AccuracyMetric.evaluate(self, pred, target, seq_len=None)'看出这里是evaluation
        的时候发生了错误。这样避免了需要在完成一整个epoch的训练才能发现evaluation弄错的情况。这里的修改是通过在初始化metric的时候
        指明通过'output'获取`pred`, 即AccuracyMetric(pred='output')。
 Example2.2
    ::

    可以通过check_code_level调节检查的强度。默认为0，即进行检查。
        import numpy as np
        from torch import nn
        from torch.optim import SGD
        from fastNLP import Trainer
        from fastNLP import DataSet
        from fastNLP import L1Loss
        import torch

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(1, 1)
            def forward(self, a):
                return {'pred_b': self.fc(a.unsqueeze(1)).squeeze(1), 'No use':1}

        model = Model()

        dataset = DataSet({'a': np.arange(10, dtype=float), 'b':np.arange(10, dtype=float)*2})

        dataset.set_input('a')
        dataset.set_target('b')

        trainer = Trainer(dataset, model, loss=L1Loss(target='label'), optimizer=SGD(model.parameters(), lr=0.001))
        # 报错信息如下
        # input fields after batch(if batch size is 2):
        #     a: (1)type:torch.Tensor (2)dtype:torch.float32, (3)shape:torch.Size([2])
        # target fields after batch(if batch size is 2):
        #     b: (1)type:torch.Tensor (2)dtype:torch.float32, (3)shape:torch.Size([2])
        # ....
        # NameError:
        # Problems occurred when calling L1Loss.get_loss(self, pred, target)
        #     missing param: ['pred(assign to `pred` in `L1Loss`)', 'label(assign to `target` in `L1Loss`)']
        #     unused field: ['b']
        #     unused param: ['pred_b', 'No use']
        #     target field: ['b']
        #     param from Model.forward(self, a): ['pred_b', 'No use']
        #     Suggestion: (1). Check key assignment for `target` when initialize L1Loss. Or provide `label` in DataSet or output of Model.forward(self, a).
        #             (2). Check key assignment for `pred` when initialize L1Loss. Or provide `pred` in DataSet or output of Model.forward(self, a).

    报错信息也包含两部分:

    1 第一部分与上面是一样的

    2 这里报错的原因是由于计算loss的时候找不到相应的值(通过L1Loss.get_loss(self, pred, target)判断出来的)；
    报错的原因是因为 `pred` 和 `label` (我们在初始化L1Loss时将target指定为了label)都没有找到。
    这里'unused field'是DataSet中出现了，但却没有被设置为input或者target的field；
    'unused param'是forward()中返回且没有被使用到的内容；'target field'是被设置为了target的field;
    'param from Model.forward(self, a)'是forward()返回的所有key。"Suggestion"是关于当前错误处理的建议。

    但是在一些情况下，比如forward()返回值只有一个，target也只有一个，fastNLP不会进行匹配，而直接将forward()的结果作为pred,
    将DataSet中的target设置为target。上面的例子在返回值中加入了一个'No use'则只是为了使得Loss去匹配结果。

 3. Trainer与callback

    下面是带有dev dataset时如果出现错误会发生的报错，

 Example2.3
    ::
    
        import numpy as np
        from torch import nn
        from torch.optim import SGD
        from fastNLP import Trainer
        from fastNLP import DataSet
        from fastNLP import AccuracyMetric
        import torch

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(1, 1)
            def forward(self, a, b):
                loss = torch.mean((self.fc(a.float().unsqueeze(1))-b.float())**2)
                return {'loss': loss}
            def predict(self, a):  # 使用predict()进行验证
                return {'output':self.fc(a.float().unsqueeze(1))} #这里return的值不包含'pred'这个key
        model = Model()

        dataset = DataSet({'a': np.arange(10), 'b':np.arange(10)*2})
        dev_data = DataSet({'a': np.arange(10, 20), 'b':np.arange(10, 20)*2})

        dataset.set_input('a', 'b')
        dev_data.set_input('a')  # 这里没有设置target

        trainer = Trainer(dataset, model, loss=None, optimizer=SGD(model.parameters(), lr=0.001),
                         dev_data=dev_data, metrics=AccuracyMetric())

        # 报错信息
        # ...
        # NameError:
        # Problems occurred when calling AccuracyMetric.evaluate(self, pred, target, seq_len=None)
        #     missing param: ['pred(assign to `pred` in `AccuracyMetric`)', 'target(assign to `target` in `AccuracyMetric`)']
        #     unused param: ['output']
        #     target field: []
        #     param from Model.predict(self, a): ['output']
        #     Suggestion: (1). Check key assignment for `pred` when initialize AccuracyMetric. Or provide `pred` in DataSet or output of Model.predict(self, a).
        #             (2). Check key assignment for `target` when initialize AccuracyMetric. Or provide `target` in DataSet or output of Model.predict(self, a).

    报错信息和前面都是类似的，但是可以通过'AccuracyMetric.evaluate(self, pred, target, seq_len=None)'看出这里是evaluation
    的时候发生了错误。这样避免了需要在完成一整个epoch的训练才能发现evaluation弄错的情况。这里的修改是通过在初始化metric的时候
    指明通过'output'获取`pred`, 即AccuracyMetric(pred='output')。

    可以通过check_code_level调节检查的强度。默认为0，即进行检查。

 3 Trainer与callback
    虽然Trainer本身已经集成了一些功能，但仍然不足以囊括训练过程中可能需要到的功能，比如负采样，learning rate decay, Early Stop等。
    为了解决这个问题fastNLP引入了callback的机制，Callback_ 是一种在Trainer训练过程中特定阶段会运行的函数集合，所有的 Callback_ 都具有
    on_*(比如on_train_start, on_backward_begin)等函数。如果 Callback 实现了该函数，则Trainer运行至对应阶段，会进行调用。
    为了解决这个问题fastNLP引入了callback的机制，:class:`~fastNLP.Callback` 是一种在Trainer训练过程中特定阶段会运行的函数集合，
    所有的 :class:`~fastNLP.Callback` 都具有on_*(比如on_train_start, on_backward_begin)等函数。
    如果 Callback 实现了该函数，则Trainer运行至对应阶段，会进行调用。

    我们将Train.train()这个函数内部分为以下的阶段，在对应阶段会触发相应的调用。

@@ -286,12 +296,11 @@ Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在
        callback.on_train_end() # 训练结束
        callback.on_exception() # 这是一个特殊的步骤，在训练过程中遭遇exception会跳转到这里

    fastNLP已经自带了很多callback函数供使用，可以参考 Callback_ 。一些关于callback的例子，请参考 #TODO callback的例子
    fastNLP已经自带了很多callback函数供使用，可以参考 :class:`~fastNLP.Callback` 。
    TODO callback的例子 一些关于callback的例子，请参考

 """



 import os
 import time
 from datetime import datetime
@@ -300,32 +309,91 @@ from datetime import timedelta
 import numpy as np
 import torch
 from torch import nn
 import warnings

 try:
    from tqdm.autonotebook import tqdm
 except:
    from fastNLP.core.utils import _pseudo_tqdm as tqdm

 from fastNLP.core.batch import Batch
 from fastNLP.core.callback import CallbackManager, CallbackException
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.losses import _prepare_losser
 from fastNLP.core.metrics import _prepare_metrics
 from fastNLP.core.sampler import Sampler
 from fastNLP.core.sampler import RandomSampler
 from fastNLP.core.sampler import SequentialSampler
 from fastNLP.core.tester import Tester
 from fastNLP.core.utils import _CheckError
 from fastNLP.core.utils import _build_args
 from fastNLP.core.utils import _check_forward_error
 from fastNLP.core.utils import _check_loss_evaluate
 from fastNLP.core.utils import _move_dict_value_to_device
 from fastNLP.core.utils import _get_func_signature
 from fastNLP.core.utils import _get_model_device
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.utils import _move_model_to_device
    from .utils import _pseudo_tqdm as tqdm

 from .batch import Batch
 from .callback import CallbackManager, CallbackException
 from .dataset import DataSet
 from .losses import _prepare_losser
 from .metrics import _prepare_metrics
 from .sampler import Sampler
 from .sampler import RandomSampler
 from .sampler import SequentialSampler
 from .tester import Tester
 from .utils import _CheckError
 from .utils import _build_args
 from .utils import _check_forward_error
 from .utils import _check_loss_evaluate
 from .utils import _move_dict_value_to_device
 from .utils import _get_func_signature
 from .utils import _get_model_device
 from .optimizer import Optimizer
 from .utils import _move_model_to_device


 class Trainer(object):
    """
    别名：:class:`fastNLP.Trainer` :class:`fastNLP.core.trainer.Trainer`
    
    Trainer在fastNLP中用于组织单任务的训练过程，可以避免用户在不同训练任务中重复撰写
        (1) epoch循环;
        (2) 将数据分成不同的Batch;
        (3) 对Batch进行pad;
        (4) 每个epoch结束或一定step后进行验证集验证;
        (5) 保存获得更好验证性能的模型等。
    
    详细的介绍参见 :doc:`fastNLP.core.trainer`
    
    :param train_data: 训练集， :class:`~fastNLP.DataSet` 类型。
    :param nn.modules model: 待训练的模型
    :param torch.optim.Optimizer optimizer: 优化器。如果为None，则Trainer使用默认的Adam(model.parameters(), lr=4e-3)这个优化器
    :param int batch_size: 训练和验证的时候的batch大小。
    :param loss: 使用的 :class:`~fastNLP.core.losses.LossBase` 对象。当为None时，默认使用 :class:`~fastNLP.LossInForward`
    :param sampler: Batch数据生成的顺序， :class:`~fastNLP.Sampler` 类型。如果为None，默认使用 :class:`~fastNLP.RandomSampler`
    :param update_every: int, 多少步更新一次梯度。用于希望累计梯度的场景，比如需要128的batch_size, 但是直接设为128
        会导致内存不足，通过设置batch_size=32, update_every=4达到目的。当optimizer为None时，该参数无效。
    :param int n_epochs: 需要优化迭代多少次。
    :param int print_every: 多少次反向传播更新tqdm显示的loss; 如果use_tqdm=False, 则多少次反向传播打印loss。
    :param dev_data: 用于做验证的DataSet， :class:`~fastNLP.DataSet` 类型。
    :param metrics: 验证的评估函数。可以只使用一个 :class:`Metric<fastNLP.core.metrics.MetricBase>` ，
        也可以使用多个 :class:`Metric<fastNLP.core.metrics.MetricBase>` ，通过列表传入。
        如验证时取得了更好的验证结果(如果有多个Metric，以列表中第一个Metric为准)，且save_path不为None，
        则保存当前模型。Metric种类详见 :doc:`metrics模块 <fastNLP.core.metrics>` 。仅在传入dev_data时有效。
    :param str,None metric_key:  :class:`Metric<fastNLP.core.metrics.MetricBase>` 有时会有多个指标，
        比如 :class:`~fastNLP.core.metrics.SpanFPreRecMetric` 中包含了'f', 'pre', 'rec'。此时需
        要指定以哪个指标为准。另外有些指标是越小效果越好，比如语言模型的困惑度，这种情况下，在key前面增加一个'-'来表
        明验证时，值越小越好(比如: "-ppl")。仅在传入dev_data时有效。
    :param int validate_every: 多少个step在验证集上验证一次; 如果为-1，则每个epoch结束验证一次。仅在传入dev_data时有效。
    :param str,None save_path: 将模型保存路径。如果为None，则不保存模型。如果dev_data为None，则保存最后一次迭代的模型。
        保存的时候不仅保存了参数，还保存了模型结构。即便使用DataParallel，这里也只保存模型。
    :param prefetch: bool, 是否使用额外的进程对产生batch数据。理论上会使得Batch迭代更快。
    :param bool use_tqdm: 是否使用tqdm来显示训练进度; 如果为False，则将loss打印在终端中。
    :param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None，即Trainer不对模型
        的计算位置进行管理。支持以下的输入:

        1. str: ['cpu', 'cuda', 'cuda:0', 'cuda:1', ...] 依次为'cpu'中, 可见的第一个GPU中, 可见的第一个GPU中,
        可见的第二个GPU中;

        2. torch.device：将模型装载到torch.device上。

        3. int: 将使用device_id为该值的gpu进行训练

        4. list(int)：如果多于1个device，将使用torch.nn.DataParallel包裹model, 并使用传入的device。

        5. None. 为None则不对模型进行任何处理，如果传入的model为torch.nn.DataParallel该值必须为None。

    :param list(callbacks) callbacks: 用于在train过程中起调节作用的回调函数。比如early stop，negative sampling等可以
        通过callback机制实现。 可使用的callback参见 :doc:`callback模块 <fastNLP.core.callback>`
    :param int check_code_level: 模型检查等级. -1: 不进行检查; 0: 仅出现错误时停止; 1: 如果有field没有被使用，
        报告警告信息; 2: 有任何field没有被使用都报错. 检查的原理是通过使用很小的batch(默认2个sample)来运行代码，但是
        这个过程理论上不会修改任何参数，只是会检查能否运行。但如果(1)模型中存在将batch_size写为某个固定值的情况；
        (2)模型中存在累加前向计算次数的，可能会多计算1次。以上情况建议将check_code_level设置为-1。
    """
    
    def __init__(self, train_data, model, optimizer=None, loss=None,
                 batch_size=32, sampler=None, update_every=1,
                 n_epochs=10, print_every=5,
@@ -334,74 +402,30 @@ class Trainer(object):
                 prefetch=False, use_tqdm=True, device=None,
                 callbacks=None,
                 check_code_level=0):
        """
        :param DataSet train_data: 训练集
        :param nn.modules model: 待训练的模型
        :param torch.optim.Optimizer,None optimizer: 优化器。如果为None，则Trainer使用默认的Adam(model.parameters(), lr=4e-3)这个优化器
        :param int batch_size: 训练和验证的时候的batch大小。
        :param LossBase loss: 使用的Loss对象。 详见 LossBase_ 。当loss为None时，默认使用 LossInForward_ 。
        :param Sampler sampler: Batch数据生成的顺序。详见 Sampler_ 。如果为None，默认使用 RandomSampler_ 。
        :param update_every: int, 多少步更新一次梯度。用于希望累计梯度的场景，比如需要128的batch_size, 但是直接设为128
            会导致内存不足，通过设置batch_size=32, update_every=4达到目的。当optimizer为None时，该参数无效。
        :param int n_epochs: 需要优化迭代多少次。
        :param int print_every: 多少次反向传播更新tqdm显示的loss; 如果use_tqdm=False, 则多少次反向传播打印loss。
        :param DataSet dev_data: 用于做验证的DataSet。
        :param MetricBase,list(MetricBase) metrics: 验证的评估函数。可以只使用一个Metric，也可以使用多个Metric，通过
            列表传入。如验证时取得了更好的验证结果(如果有多个Metric，以列表中第一个Metric为准)，且save_path不为None，
            则保存当前模型。Metric种类详见 Metric_ 。仅在传入dev_data时有效。
        :param str,None metric_key:  Metric_ 有时会有多个指标，比如 SpanFPreRecMetric_ 中包含了'f', 'pre', 'rec'。此时需
            要指定以哪个指标为准。另外有些指标是越小效果越好，比如语言模型的困惑度，这种情况下，在key前面增加一个'-'来表
            明验证时，值越小越好(比如: "-ppl")。仅在传入dev_data时有效。
        :param int validate_every: 多少个step在验证集上验证一次; 如果为-1，则每个epoch结束验证一次。仅在传入dev_data时有
            效。
        :param str,None save_path: 将模型保存路径。如果为None，则不保存模型。如果dev_data为None，则保存最后一次迭代的模
            型。保存的时候不仅保存了参数，还保存了模型结构。即便使用DataParallel，这里也只保存模型。
        :param prefetch: bool, 是否使用额外的进程对产生batch数据。理论上会使得Batch迭代更快。
        :param bool use_tqdm: 是否使用tqdm来显示训练进度; 如果为False，则将loss打印在终端中。
        :param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None，即Trainer不对模型
            的计算位置进行管理。支持以下的输入:

            1. str: ['cpu', 'cuda', 'cuda:0', 'cuda:1', ...] 依次为'cpu'中, 可见的第一个GPU中, 可见的第一个GPU中,
            可见的第二个GPU中;

            2. torch.device：将模型装载到torch.device上。

            3. int: 将使用该device的gpu进行训练

            4. list(int)：如果多于1个device，将使用torch.nn.DataParallel包裹model, 并使用传入的device。

            5. None. 为None则不对模型进行任何处理，如果传入的model为torch.nn.DataParallel该值必须为None。

        :param list(callbacks) callbacks: 用于在train过程中起调节作用的回调函数。比如early stop，negative sampling等可以
            通过callback机制实现。 可使用的callback参见 Callback_ 。
        :param int check_code_level: 模型检查等级. -1: 不进行检查; 0: 仅出现错误时停止; 1: 如果有field没有被使用，
            报告警告信息; 2: 有任何field没有被使用都报错. 检查的原理是通过使用很小的batch(默认2个sample)来运行代码，但是
            这个过程理论上不会修改任何参数，只是会检查能否运行。但如果(1)模型中存在将batch_size写为某个固定值的情况；
            (2)模型中存在累加前向计算次数的，可能会多计算1次。以上情况建议将check_code_level设置为-1。
        """
        super(Trainer, self).__init__()

        super(Trainer, self).__init__()
        
        if not isinstance(train_data, DataSet):
            raise TypeError(f"The type of train_data must be fastNLP.DataSet, got {type(train_data)}.")
        if not isinstance(model, nn.Module):
            raise TypeError(f"The type of model must be torch.nn.Module, got {type(model)}.")

        
        # check metrics and dev_data
        if (not metrics) and dev_data is not None:
            raise ValueError("No metric for dev_data evaluation.")
        if metrics and (dev_data is None):
            raise ValueError("No dev_data for evaluations, pass dev_data or set metrics to None. ")

        
        # check update every
        assert update_every >= 1, "update_every must be no less than 1."
        self.update_every = int(update_every)

        
        # check save_path
        if not (save_path is None or isinstance(save_path, str)):
            raise ValueError("save_path can only be None or `str`.")
        # prepare evaluate
        metrics = _prepare_metrics(metrics)

        
        # parse metric_key
        # increase_better is True. It means the exp result gets better if the indicator increases.
        # It is true by default.
@@ -411,19 +435,19 @@ class Trainer(object):
            self.metric_key = metric_key[1:] if metric_key[0] == "+" or metric_key[0] == "-" else metric_key
        elif len(metrics) > 0:
            self.metric_key = metrics[0].__class__.__name__.lower().strip('metric')

        
        # prepare loss
        losser = _prepare_losser(loss)

        
        # sampler check
        if sampler is not None and not isinstance(sampler, Sampler):
            raise ValueError("The type of sampler should be fastNLP.BaseSampler, got {}.".format(type(sampler)))

        
        if check_code_level > -1:
            _check_code(dataset=train_data, model=model, losser=losser, metrics=metrics, dev_data=dev_data,
                        metric_key=metric_key, check_level=check_code_level,
                        batch_size=min(batch_size, DEFAULT_CHECK_BATCH_SIZE))

        
        self.train_data = train_data
        self.dev_data = dev_data  # If None, No validation.
        self.model = model
@@ -443,10 +467,9 @@ class Trainer(object):
        self.callback_manager = CallbackManager(env={"trainer": self}, callbacks=callbacks)
        self.n_steps = (len(self.train_data) // self.batch_size + int(
            len(self.train_data) % self.batch_size != 0)) * self.n_epochs

        # 是否一开始就是DataParallel的。
        
        self.model = _move_model_to_device(self.model, device=device)

        
        if isinstance(optimizer, torch.optim.Optimizer):
            self.optimizer = optimizer
        elif isinstance(optimizer, Optimizer):
@@ -455,11 +478,11 @@ class Trainer(object):
            self.optimizer = torch.optim.Adam(model.parameters(), lr=4e-3)
        else:
            raise TypeError("optimizer can only be torch.optim.Optimizer type, not {}.".format(type(optimizer)))

        
        self.use_tqdm = use_tqdm
        self.pbar = None
        self.print_every = abs(self.print_every)

        
        if self.dev_data is not None:
            self.tester = Tester(model=self.model,
                                 data=self.dev_data,
@@ -467,13 +490,13 @@ class Trainer(object):
                                 batch_size=self.batch_size,
                                 device=None,  # 由上面的部分处理device
                                 verbose=0)

        
        self.step = 0
        self.start_time = None  # start timestamp

        
        self.callback_manager = CallbackManager(env={"trainer": self},
                                                callbacks=callbacks)

    
    def train(self, load_best_model=True):
        """
        使用该函数使Trainer开始训练。
@@ -502,14 +525,14 @@ class Trainer(object):
            self.start_time = str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
            start_time = time.time()
            print("training epochs started " + self.start_time, flush=True)

            
            try:
                self.callback_manager.on_train_begin()
                self._train()
                self.callback_manager.on_train_end()
            except (CallbackException, KeyboardInterrupt) as e:
                self.callback_manager.on_exception(e)

            
            if self.dev_data is not None and hasattr(self, 'best_dev_perf'):
                print(
                    "\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) +
@@ -527,9 +550,9 @@ class Trainer(object):
        finally:
            pass
        results['seconds'] = round(time.time() - start_time, 2)

        
        return results

    
    def _train(self):
        if not self.use_tqdm:
            from fastNLP.core.utils import _pseudo_tqdm as inner_tqdm
@@ -538,7 +561,7 @@ class Trainer(object):
        self.step = 0
        self.epoch = 0
        start = time.time()

        
        with inner_tqdm(total=self.n_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar:
            self.pbar = pbar if isinstance(pbar, tqdm) else None
            avg_loss = 0
@@ -557,21 +580,21 @@ class Trainer(object):
                    # negative sampling; replace unknown; re-weight batch_y
                    self.callback_manager.on_batch_begin(batch_x, batch_y, indices)
                    prediction = self._data_forward(self.model, batch_x)

                    
                    # edit prediction
                    self.callback_manager.on_loss_begin(batch_y, prediction)
                    loss = self._compute_loss(prediction, batch_y).mean()
                    avg_loss += loss.item()
                    loss = loss / self.update_every

                    
                    # Is loss NaN or inf? requires_grad = False
                    self.callback_manager.on_backward_begin(loss)
                    self._grad_backward(loss)
                    self.callback_manager.on_backward_end()

                    
                    self._update()
                    self.callback_manager.on_step_end()

                    
                    if self.step % self.print_every == 0:
                        avg_loss = float(avg_loss) / self.print_every
                        if self.use_tqdm:
@@ -585,7 +608,7 @@ class Trainer(object):
                        pbar.set_postfix_str(print_output)
                        avg_loss = 0
                    self.callback_manager.on_batch_end()

                    
                    if ((self.validate_every > 0 and self.step % self.validate_every == 0) or
                        (self.validate_every < 0 and self.step % len(data_iterator) == 0)) \
                            and self.dev_data is not None:
@@ -594,20 +617,20 @@ class Trainer(object):
                                                                                    self.n_steps) + \
                                   self.tester._format_eval_results(eval_res)
                        pbar.write(eval_str + '\n')

                
                # ================= mini-batch end ==================== #

                
                # lr decay; early stopping
                self.callback_manager.on_epoch_end()
            # =============== epochs end =================== #
            pbar.close()
            self.pbar = None
        # ============ tqdm end ============== #

    
    def _do_validation(self, epoch, step):
        self.callback_manager.on_valid_begin()
        res = self.tester.test()

        
        is_better_eval = False
        if self._better_eval_result(res):
            if self.save_path is not None:
@@ -622,7 +645,7 @@ class Trainer(object):
        # get validation results; adjust optimizer
        self.callback_manager.on_valid_end(res, self.metric_key, self.optimizer, is_better_eval)
        return res

    
    def _mode(self, model, is_test=False):
        """Train mode or Test mode. This is for PyTorch currently.

@@ -634,21 +657,22 @@ class Trainer(object):
            model.eval()
        else:
            model.train()

    
    def _update(self):
        """Perform weight update on a model.

        """
        if self.optimizer is not None and (self.step + 1) % self.update_every == 0:
            self.optimizer.step()

    
    def _data_forward(self, network, x):
        x = _build_args(network.forward, **x)
        y = network(**x)
        if not isinstance(y, dict):
            raise TypeError(f"The return value of {_get_func_signature(network.forward)} should be dict, got {type(y)}.")
            raise TypeError(
                f"The return value of {_get_func_signature(network.forward)} should be dict, got {type(y)}.")
        return y

    
    def _grad_backward(self, loss):
        """Compute gradient with link rules.

@@ -659,7 +683,7 @@ class Trainer(object):
        if self.step % self.update_every == 0:
            self.model.zero_grad()
        loss.backward()

    
    def _compute_loss(self, predict, truth):
        """Compute loss given prediction and ground truth.

@@ -668,7 +692,7 @@ class Trainer(object):
        :return: a scalar
        """
        return self.losser(predict, truth)

    
    def _save_model(self, model, model_name, only_param=False):
        """ 存储不含有显卡信息的state_dict或model
        :param model:
@@ -691,7 +715,7 @@ class Trainer(object):
                model.cpu()
                torch.save(model, model_path)
                model.to(self._model_device)

    
    def _load_model(self, model, model_name, only_param=False):
        # 返回bool值指示是否成功reload模型
        if self.save_path is not None:
@@ -709,7 +733,7 @@ class Trainer(object):
        else:
            return False
        return True

    
    def _better_eval_result(self, metrics):
        """Check if the current epoch yields better validation results.

@@ -760,7 +784,7 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_
                check_level=0):
    # check get_loss 方法
    model_devcie = model.parameters().__next__().device

    
    batch = Batch(dataset=dataset, batch_size=batch_size, sampler=SequentialSampler())
    for batch_count, (batch_x, batch_y) in enumerate(batch):
        _move_dict_value_to_device(batch_x, batch_y, device=model_devcie)
@@ -784,13 +808,13 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_
            print(info_str)
            _check_forward_error(forward_func=model.forward, dataset=dataset,
                                 batch_x=batch_x, check_level=check_level)

        
        refined_batch_x = _build_args(model.forward, **batch_x)
        pred_dict = model(**refined_batch_x)
        func_signature = _get_func_signature(model.forward)
        if not isinstance(pred_dict, dict):
            raise TypeError(f"The return value of {func_signature} should be `dict`, not `{type(pred_dict)}`.")

        
        # loss check
        try:
            loss = losser(pred_dict, batch_y)
@@ -814,7 +838,7 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_
        model.zero_grad()
        if batch_count + 1 >= DEFAULT_CHECK_NUM_BATCH:
            break

    
    if dev_data is not None:
        tester = Tester(data=dev_data[:batch_size * DEFAULT_CHECK_NUM_BATCH], model=model, metrics=metrics,
                        batch_size=batch_size, verbose=-1)
@@ -828,7 +852,7 @@ def _check_eval_results(metrics, metric_key, metric_list):
    # metric_list: 多个用来做评价的指标，来自Trainer的初始化
    if isinstance(metrics, tuple):
        loss, metrics = metrics

    
    if isinstance(metrics, dict):
        if len(metrics) == 1:
            # only single metric, just use it
@@ -839,7 +863,7 @@ def _check_eval_results(metrics, metric_key, metric_list):
            if metrics_name not in metrics:
                raise RuntimeError(f"{metrics_name} is chosen to do validation, but got {metrics}")
            metric_dict = metrics[metrics_name]

        
        if len(metric_dict) == 1:
            indicator_val, indicator = list(metric_dict.values())[0], list(metric_dict.keys())[0]
        elif len(metric_dict) > 1 and metric_key is None:
--- a/fastNLP/core/utils.py
+++ b/fastNLP/core/utils.py
@@ -1,3 +1,7 @@
 """
 utils模块实现了 fastNLP 内部和外部所需的很多工具。其中用户可以使用的是 :func:`cache_results` 修饰器。
 """
 __all__ = ["cache_results"]
 import _pickle
 import inspect
 import os
@@ -29,6 +33,8 @@ def _prepare_cache_filepath(filepath):
 #  TODO 可以保存下缓存时的参数，如果load的时候发现参数不一致，发出警告。
 def cache_results(_cache_fp, _refresh=False, _verbose=1):
    """
    别名：:class:`fastNLP.cache_results` :class:`fastNLP.core.uitls.cache_results`

    cache_results是fastNLP中用于cache数据的装饰器。通过下面的例子看一下如何使用

    Example::
@@ -193,13 +199,14 @@ def _move_model_to_device(model, device):
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        raise RuntimeError("model of `torch.nn.parallel.DistributedDataParallel` is not supported right now.")

    if not torch.cuda.is_available() and (device!='cpu' or (isinstance(device, torch.device) and device.type!='cpu')):
        raise ValueError("There is no usable gpu. set `device` as `cpu`.")

    if device is None:
        if isinstance(model, torch.nn.DataParallel):
            model.cuda()
        return model
    else:
        if not torch.cuda.is_available() and (
                device != 'cpu' or (isinstance(device, torch.device) and device.type != 'cpu')):
            raise ValueError("There is no usable gpu. set `device` as `cpu`.")

    if isinstance(model, torch.nn.DataParallel):
        raise RuntimeError("When model is `torch.nn.DataParallel`, the device has to be `None`.")
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -1,6 +1,6 @@
 from functools import wraps
 from collections import Counter
 from fastNLP.core.dataset import DataSet
 from .dataset import DataSet

 def _check_build_vocab(func):
    """A decorator to make sure the indexing is built before used.
@@ -34,6 +34,8 @@ def _check_build_status(func):

 class Vocabulary(object):
    """
    别名：:class:`fastNLP.Vocabulary` :class:`fastNLP.core.vocabulary.Vocabulary`
    
    用于构建, 存储和使用 `str` 到 `int` 的一一映射

    Example::
@@ -98,7 +100,7 @@ class Vocabulary(object):
        """
        依次增加序列中词在词典中的出现频率

        :param list(str) word_lst: 词的序列
        :param list[str] word_lst: 词的序列
        """
        self.update(word_lst)

@@ -185,12 +187,11 @@ class Vocabulary(object):
            # remember to use `field_name`
            vocab.index_dataset(train_data, dev_data, test_data, field_name='words')

        :param DataSet datasets: 需要转index的 DataSet, 支持一个或多个
        :param datasets: 需要转index的 class:`~fastNLP.DataSet` , 支持一个或多个（list）
        :param str field_name: 需要转index的field, 若有多个 DataSet, 每个DataSet都必须有此 field.
            目前仅支持 ``str`` , ``list(str)`` , ``list(list(str))``
        :param str new_field_name: 保存结果的field_name. 若为 ``None`` , 将覆盖原field.
            Default: ``None``
        :return self:
        """
        def index_instance(ins):
            """
@@ -230,7 +231,7 @@ class Vocabulary(object):
            # remember to use `field_name`
            vocab.from_dataset(train_data1, train_data2, field_name='words')

        :param DataSet datasets: 需要转index的 DataSet, 支持一个或多个.
        :param datasets: 需要转index的 class:`~fastNLP.DataSet` , 支持一个或多个（list）
        :param field_name: 可为 ``str`` 或 ``list(str)`` .
            构建词典所使用的 field(s), 支持一个或多个field
            若有多个 DataSet, 每个DataSet都必须有这些field.
--- a/fastNLP/io/init.py
+++ b/fastNLP/io/init.py
@@ -12,13 +12,14 @@
 这些类的使用方法可以在对应module的文档下查看.
 """
 from .embed_loader import EmbedLoader
 from .dataset_loader import *
 from .config_io import *
 from .model_io import *
 from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \
    PeopleDailyCorpusLoader, Conll2003Loader
 from .config_io import ConfigLoader, ConfigSection, ConfigSaver
 from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver

 __all__ = [
    'EmbedLoader',

    
    'DataSetLoader',
    'CSVLoader',
    'JsonLoader',
@@ -27,11 +28,11 @@ __all__ = [
    'SSTLoader',
    'PeopleDailyCorpusLoader',
    'Conll2003Loader',

    
    'ConfigLoader',
    'ConfigSection',
    'ConfigSaver',

    
    'ModelLoader',
    'ModelSaver',
 ]
 ]
--- a/fastNLP/io/base_loader.py
+++ b/fastNLP/io/base_loader.py
@@ -3,7 +3,8 @@ import os


 class BaseLoader(object):
    """Base loader for all loaders.
    """
    各个 Loader 的基类，提供了 API 的参考。

    """
    def __init__(self):
@@ -11,7 +12,10 @@ class BaseLoader(object):

    @staticmethod
    def load_lines(data_path):
        """按行读取，舍弃每行两侧空白字符，返回list of str
        """
        按行读取，舍弃每行两侧空白字符，返回list of str

        :param data_path: 读取数据的路径
        """
        with open(data_path, "r", encoding="utf=8") as f:
            text = f.readlines()
@@ -19,7 +23,10 @@ class BaseLoader(object):

    @classmethod
    def load(cls, data_path):
        """先按行读取，去除一行两侧空白，再提取每行的字符。返回list of list of str
        """
        先按行读取，去除一行两侧空白，再提取每行的字符。返回list of list of str
        
        :param data_path:
        """
        with open(data_path, "r", encoding="utf-8") as f:
            text = f.readlines()
@@ -40,9 +47,7 @@ class BaseLoader(object):


 class DataLoaderRegister:
    """Register for all data sets.

    """
    # TODO 这个类使用在何处？
    _readers = {}

    @classmethod
--- a/fastNLP/io/config_io.py
+++ b/fastNLP/io/config_io.py
@@ -1,19 +1,22 @@
 """
 .. _config-io:

 用于读入和处理和保存 config 文件
 """
 __all__ = ["ConfigLoader","ConfigSection","ConfigSaver"]
 import configparser
 import json
 import os

 from fastNLP.io.base_loader import BaseLoader
 from .base_loader import BaseLoader


 class ConfigLoader(BaseLoader):
    """Loader for configuration.
    """
    别名：:class:`fastNLP.io.ConfigLoader` :class:`fastNLP.io.config_io.ConfigLoader`

    读取配置文件的Loader

    :param str data_path: path to the config
    :param str data_path: 配置文件的路径

    """
    def __init__(self, data_path=None):
@@ -27,14 +30,16 @@ class ConfigLoader(BaseLoader):

    @staticmethod
    def load_config(file_path, sections):
        """Load section(s) of configuration into the ``sections`` provided. No returns.
        """
        把配置文件的section 存入提供的 ``sections`` 中

        :param str file_path: the path of config file
        :param dict sections: the dict of ``{section_name(string): ConfigSection object}``
            Example::
    
                test_args = ConfigSection()
                ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})
        :param str file_path: 配置文件的路径
        :param dict sections:  符合如下键值对组成的字典 `section_name(string)` : :class:`~fastNLP.io.ConfigSection`
            
        Example::

            test_args = ConfigSection()
            ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})

        """
        assert isinstance(sections, dict)
@@ -70,7 +75,10 @@ class ConfigLoader(BaseLoader):


 class ConfigSection(object):
    """ConfigSection is the data structure storing all key-value pairs in one section in a config file.
    """
    别名：:class:`fastNLP.io.ConfigSection` :class:`fastNLP.io.config_io.ConfigSection`

    ConfigSection是一个存储了一个section中所有键值对的数据结构，推荐使用此类的实例来配合 :meth:`ConfigLoader.load_config` 使用

    """

@@ -146,9 +154,12 @@ class ConfigSection(object):


 class ConfigSaver(object):
    """ConfigSaver is used to save config file and solve related conflicts.
    """
    别名：:class:`fastNLP.io.ConfigSaver` :class:`fastNLP.io.config_io.ConfigSaver`

    ConfigSaver 是用来存储配置文件并解决相关冲突的类

    :param str file_path: path to the config file
    :param str file_path: 配置文件的路径

    """
    def __init__(self, file_path):
@@ -157,7 +168,8 @@ class ConfigSaver(object):
            raise FileNotFoundError("file {} NOT found!".__format__(self.file_path))

    def _get_section(self, sect_name):
        """This is the function to get the section with the section name.
        """
        This is the function to get the section with the section name.

        :param sect_name: The name of section what wants to load.
        :return: The section.
@@ -167,7 +179,8 @@ class ConfigSaver(object):
        return sect

    def _read_section(self):
        """This is the function to read sections from the config file.
        """
        This is the function to read sections from the config file.

        :return: sect_list, sect_key_list
            sect_list: A list of ConfigSection().
@@ -219,7 +232,8 @@ class ConfigSaver(object):
        return sect_list, sect_key_list

    def _write_section(self, sect_list, sect_key_list):
        """This is the function to write config file with section list and name list.
        """
        This is the function to write config file with section list and name list.

        :param sect_list: A list of ConfigSection() need to be writen into file.
        :param sect_key_list: A list of name of sect_list.
@@ -240,10 +254,11 @@ class ConfigSaver(object):
                f.write('\n')

    def save_config_file(self, section_name, section):
        """This is the function to be called to change the config file with a single section and its name.
        """
        这个方法可以用来修改并保存配置文件中单独的一个 section

        :param str section_name: The name of section what needs to be changed and saved.
        :param ConfigSection section: The section with key and value what needs to be changed and saved.
        :param str section_name: 需要保存的 section 的名字.
        :param section: 你需要修改并保存的 section， :class:`~fastNLP.io.ConfigSaver` 类型
        """
        section_file = self._get_section(section_name)
        if len(section_file.__dict__.keys()) == 0:  # the section not in the file before
--- a/fastNLP/io/dataset_loader.py
+++ b/fastNLP/io/dataset_loader.py
@@ -1,8 +1,6 @@
 """
 .. _dataset-loader:

 DataSetLoader 的 API, 用于读取不同格式的数据, 并返回 `DataSet` ,
 得到的 `DataSet` 对象可以直接传入 `Trainer`, `Tester`, 用于模型的训练和测试
 dataset_loader模块实现了许多 DataSetLoader, 用于读取不同格式的数据, 并返回 `DataSet` ,
 得到的 :class:`~fastNLP.DataSet` 对象可以直接传入 :class:`~fastNLP.Trainer`, :class:`~fastNLP.Tester`, 用于模型的训练和测试

 Example::

@@ -13,50 +11,50 @@ Example::

    # ... do stuff
 """
 import os
 import json

 from nltk.tree import Tree

 from fastNLP.core.dataset import DataSet
 from fastNLP.core.instance import Instance
 from fastNLP.io.file_reader import _read_csv, _read_json, _read_conll
 from ..core.dataset import DataSet
 from ..core.instance import Instance
 from .file_reader import _read_csv, _read_json, _read_conll


 def _download_from_url(url, path):
    from tqdm import tqdm
    import requests

    
    """Download file"""
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, stream=True)
    chunk_size = 16 * 1024
    total_size = int(r.headers.get('Content-length', 0))
    with open(path, "wb") as file ,\
        tqdm(total=total_size, unit='B', unit_scale=1, desc=path.split('/')[-1]) as t:
    with open(path, "wb") as file, \
            tqdm(total=total_size, unit='B', unit_scale=1, desc=path.split('/')[-1]) as t:
        for chunk in r.iter_content(chunk_size):
            if chunk:
                file.write(chunk)
                t.update(len(chunk))
    return


 def _uncompress(src, dst):
    import zipfile, gzip, tarfile, os

    
    def unzip(src, dst):
        with zipfile.ZipFile(src, 'r') as f:
            f.extractall(dst)

    
    def ungz(src, dst):
        with gzip.open(src, 'rb') as f, open(dst, 'wb') as uf:
            length = 16 * 1024 # 16KB
            length = 16 * 1024  # 16KB
            buf = f.read(length)
            while buf:
                uf.write(buf)
                buf = f.read(length)

    
    def untar(src, dst):
        with tarfile.open(src, 'r:gz') as f:
            f.extractall(dst)

    
    fn, ext = os.path.splitext(src)
    _, ext_2 = os.path.splitext(fn)
    if ext == '.zip':
@@ -71,42 +69,48 @@ def _uncompress(src, dst):

 class DataSetLoader:
    """
    别名：:class:`fastNLP.io.DataSetLoader` :class:`fastNLP.io.dataset_loader.DataSetLoader`

    所有`DataSetLoader`的接口
    所有 DataSetLoader 的 API 接口，你可以继承它实现自己的 DataSetLoader
    """

    
    def load(self, path):
        """从指定 ``path`` 的文件中读取数据,返回DataSet

        :param str path: file path
        :return: a DataSet object
        :param str path: 文件路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        raise NotImplementedError

    
    def convert(self, data):
        """用Python数据对象创建DataSet
        """
        用Python数据对象创建DataSet，各个子类需要自行实现这个方法

        :param data: inner data structure (user-defined) to represent the data.
        :return: a DataSet object
        :param data: Python 内置的数据结构
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        raise NotImplementedError


 class PeopleDailyCorpusLoader(DataSetLoader):
    """读取人民日报数据集
    """
    别名：:class:`fastNLP.io.PeopleDailyCorpusLoader` :class:`fastNLP.io.dataset_loader.PeopleDailyCorpusLoader`

    读取人民日报数据集
    """
    
    def __init__(self):
        super(PeopleDailyCorpusLoader, self).__init__()
        self.pos = True
        self.ner = True

    
    def load(self, data_path, pos=True, ner=True):
        """

        :param str data_path: 数据路径
        :param bool pos: 是否使用词性标签
        :param bool ner: 是否使用命名实体标签
        :return: a DataSet object
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        self.pos, self.ner = pos, ner
        with open(data_path, "r", encoding="utf-8") as f:
@@ -152,8 +156,13 @@ class PeopleDailyCorpusLoader(DataSetLoader):
                example.append(sent_ner)
            examples.append(example)
        return self.convert(examples)

    
    def convert(self, data):
        """
        
        :param data: python 内置对象
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        data_set = DataSet()
        for item in data:
            sent_words = item[0]
@@ -172,6 +181,8 @@ class PeopleDailyCorpusLoader(DataSetLoader):

 class ConllLoader(DataSetLoader):
    """
    别名：:class:`fastNLP.io.ConllLoader` :class:`fastNLP.io.dataset_loader.ConllLoader`

    读取Conll格式的数据. 数据格式详见 http://conll.cemantix.org/2012/data.html

    列号从0开始, 每列对应内容为::
@@ -193,9 +204,10 @@ class ConllLoader(DataSetLoader):

    :param headers: 每一列数据的名称，需为List or Tuple  of str。``header`` 与 ``indexs`` 一一对应
    :param indexs: 需要保留的数据列下标，从0开始。若为 ``None`` ，则所有列都保留。Default: ``None``
    :param dropna: 是否忽略非法数据，若 ``False`` ，遇到非法数据时抛出 ``ValueError`` 。Default: ``True``
    :param dropna: 是否忽略非法数据，若 ``False`` ，遇到非法数据时抛出 ``ValueError`` 。Default: ``False``
    """
    def __init__(self, headers, indexs=None, dropna=True):
    
    def __init__(self, headers, indexs=None, dropna=False):
        super(ConllLoader, self).__init__()
        if not isinstance(headers, (list, tuple)):
            raise TypeError('invalid headers: {}, should be list of strings'.format(headers))
@@ -207,21 +219,25 @@ class ConllLoader(DataSetLoader):
            if len(indexs) != len(headers):
                raise ValueError
            self.indexs = indexs

    
    def load(self, path):
        ds = DataSet()
        for idx, data in _read_conll(path, indexes=self.indexs, dropna=self.dropna):
            ins = {h:data[i] for i, h in enumerate(self.headers)}
            ins = {h: data[i] for i, h in enumerate(self.headers)}
            ds.append(Instance(**ins))
        return ds


 class Conll2003Loader(ConllLoader):
    """读取Conll2003数据
    """
    别名：:class:`fastNLP.io.Conll2003Loader` :class:`fastNLP.io.dataset_loader.Conll2003Loader`

    读取Conll2003数据
    
    关于数据集的更多信息,参考:
    https://sites.google.com/site/ermasoftware/getting-started/ne-tagging-conll2003-data
    """
    
    def __init__(self):
        headers = [
            'tokens', 'pos', 'chunks', 'ner',
@@ -260,7 +276,10 @@ def _cut_long_sentence(sent, max_sample_length=200):


 class SSTLoader(DataSetLoader):
    """读取SST数据集, DataSet包含fields::
    """
    别名：:class:`fastNLP.io.SSTLoader` :class:`fastNLP.io.dataset_loader.SSTLoader`
    
    读取SST数据集, DataSet包含fields::

        words: list(str) 需要分类的文本
        target: str 文本的标签
@@ -270,21 +289,22 @@ class SSTLoader(DataSetLoader):
    :param subtree: 是否将数据展开为子树，扩充数据量. Default: ``False``
    :param fine_grained: 是否使用SST-5标准，若 ``False`` , 使用SST-2。Default: ``False``
    """
    
    def __init__(self, subtree=False, fine_grained=False):
        self.subtree = subtree

        tag_v = {'0':'very negative', '1':'negative', '2':'neutral',
                 '3':'positive', '4':'very positive'}
        
        tag_v = {'0': 'very negative', '1': 'negative', '2': 'neutral',
                 '3': 'positive', '4': 'very positive'}
        if not fine_grained:
            tag_v['0'] = tag_v['1']
            tag_v['4'] = tag_v['3']
        self.tag_v = tag_v

    
    def load(self, path):
        """

        :param path: str，存储数据的路径
        :return: DataSet。
        :param str path: 存储数据的路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        datalist = []
        with open(path, 'r', encoding='utf-8') as f:
@@ -296,7 +316,7 @@ class SSTLoader(DataSetLoader):
        for words, tag in datas:
            ds.append(Instance(words=words, target=tag))
        return ds

    
    @staticmethod
    def _get_one(data, subtree):
        tree = Tree.fromstring(data)
@@ -307,15 +327,18 @@ class SSTLoader(DataSetLoader):

 class JsonLoader(DataSetLoader):
    """
    别名：:class:`fastNLP.io.JsonLoader` :class:`fastNLP.io.dataset_loader.JsonLoader`

    读取json格式数据.数据必须按行存储,每行是一个包含各类属性的json对象

    :param dict fields: 需要读入的json属性名称, 和读入后在DataSet中存储的field_name
        ``fields`` 的`key`必须是json对象的属性名. ``fields`` 的`value`为读入后在DataSet存储的`field_name`,
        `value`也可为 ``None`` , 这时读入后的`field_name`与json对象对应属性同名
        ``fields`` 的 `key` 必须是json对象的属性名. ``fields`` 的 `value` 为读入后在DataSet存储的 `field_name` ,
        `value` 也可为 ``None`` , 这时读入后的 `field_name` 与json对象对应属性同名
        ``fields`` 可为 ``None`` , 这时,json对象所有属性都保存在DataSet中. Default: ``None``
    :param bool dropna: 是否忽略非法数据,若 ``True`` 则忽略,若 ``False`` ,在遇到非法数据时,抛出 ``ValueError`` .
        Default: ``True``
        Default: ``False``
    """
    
    def __init__(self, fields=None, dropna=False):
        super(JsonLoader, self).__init__()
        self.dropna = dropna
@@ -326,17 +349,22 @@ class JsonLoader(DataSetLoader):
            for k, v in fields.items():
                self.fields[k] = k if v is None else v
            self.fields_list = list(self.fields.keys())

    
    def load(self, path):
        ds = DataSet()
        for idx, d in _read_json(path, fields=self.fields_list, dropna=self.dropna):
            ins = {self.fields[k]:v for k,v in d.items()}
            if self.fields:
                ins = {self.fields[k]: v for k, v in d.items()}
            else:
                ins = d
            ds.append(Instance(**ins))
        return ds


 class SNLILoader(JsonLoader):
    """
    别名：:class:`fastNLP.io.SNLILoader` :class:`fastNLP.io.dataset_loader.SNLILoader`

    读取SNLI数据集，读取的DataSet包含fields::

        words1: list(str)，第一句文本, premise
@@ -345,6 +373,7 @@ class SNLILoader(JsonLoader):

    数据来源: https://nlp.stanford.edu/projects/snli/snli_1.0.zip
    """
    
    def __init__(self):
        fields = {
            'sentence1_parse': 'words1',
@@ -352,12 +381,14 @@ class SNLILoader(JsonLoader):
            'gold_label': 'target',
        }
        super(SNLILoader, self).__init__(fields=fields)

    
    def load(self, path):
        ds = super(SNLILoader, self).load(path)
        
        def parse_tree(x):
            t = Tree.fromstring(x)
            return t.leaves()
        
        ds.apply(lambda ins: parse_tree(ins['words1']), new_field_name='words1')
        ds.apply(lambda ins: parse_tree(ins['words2']), new_field_name='words2')
        ds.drop(lambda x: x['target'] == '-')
@@ -366,19 +397,22 @@ class SNLILoader(JsonLoader):

 class CSVLoader(DataSetLoader):
    """
    别名：:class:`fastNLP.io.CSVLoader` :class:`fastNLP.io.dataset_loader.CSVLoader`

    读取CSV格式的数据集。返回 ``DataSet``

    :param List[str] headers: CSV文件的文件头.定义每一列的属性名称,即返回的DataSet中`field`的名称
        若为 ``None`` ,则将读入文件的第一行视作 ``headers`` . Default: ``None``
    :param str sep: CSV文件中列与列之间的分隔符. Default: ","
    :param bool dropna: 是否忽略非法数据,若 ``True`` 则忽略,若 ``False`` ,在遇到非法数据时,抛出 ``ValueError`` .
        Default: ``True``
        Default: ``False``
    """
    def __init__(self, headers=None, sep=",", dropna=True):
    
    def __init__(self, headers=None, sep=",", dropna=False):
        self.headers = headers
        self.sep = sep
        self.dropna = dropna

    
    def load(self, path):
        ds = DataSet()
        for idx, data in _read_csv(path, headers=self.headers,
@@ -393,7 +427,7 @@ def _add_seg_tag(data):
    :param data: list of ([word], [pos], [heads], [head_tags])
    :return: list of ([word], [pos])
    """

    
    _processed = []
    for word_list, pos_list, _, _ in data:
        new_sample = []
@@ -407,4 +441,3 @@ def _add_seg_tag(data):
                new_sample.append((word[-1], 'E-' + pos))
        _processed.append(list(map(list, zip(*new_sample))))
    return _processed

--- a/fastNLP/io/embed_loader.py
+++ b/fastNLP/io/embed_loader.py
@@ -7,13 +7,17 @@ import os

 import numpy as np

 from fastNLP.core.vocabulary import Vocabulary
 from fastNLP.io.base_loader import BaseLoader
 from ..core.vocabulary import Vocabulary
 from .base_loader import BaseLoader

 import warnings

 class EmbedLoader(BaseLoader):
    """这个类用于从预训练的Embedding中load数据。"""
    """
    别名：:class:`fastNLP.io.EmbedLoader` :class:`fastNLP.io.embed_loader.EmbedLoader`

    这个类用于从预训练的Embedding中load数据。
    """

    def __init__(self):
        super(EmbedLoader, self).__init__()
@@ -25,13 +29,13 @@ class EmbedLoader(BaseLoader):
        word2vec(第一行只有两个元素)还是glove格式的数据。

        :param str embed_filepath: 预训练的embedding的路径。
        :param Vocabulary vocab: 词表，读取出现在vocab中的词的embedding。没有出现在vocab中的词的embedding将通过找到的词的
            embedding的正态分布采样出来，以使得整个Embedding是同分布的。
        :param vocab: 词表 :class:`~fastNLP.Vocabulary` 类型，读取出现在vocab中的词的embedding。
            没有出现在vocab中的词的embedding将通过找到的词的embedding的正态分布采样出来，以使得整个Embedding是同分布的。
        :param dtype: 读出的embedding的类型
        :param bool normalize: 是否将每个vector归一化到norm为1
        :param str error: 'ignore', 'strict'; 如果'ignore'，错误将自动跳过; 如果strict, 错误将抛出。这里主要可能出错的地
            方在于词表有空行或者词表出现了维度不一致。
        :return: numpy.ndarray, shape为 [len(vocab), dimension], dimension由pretrain的embedding决定。
        :param str error: `ignore` , `strict` ; 如果 `ignore` ，错误将自动跳过; 如果 `strict` , 错误将抛出。
            这里主要可能出错的地方在于词表有空行或者词表出现了维度不一致。
        :return numpy.ndarray:  shape为 [len(vocab), dimension], dimension由pretrain的embedding决定。
        """
        assert isinstance(vocab, Vocabulary), "Only fastNLP.Vocabulary is supported."
        if not os.path.exists(embed_filepath):
@@ -87,11 +91,11 @@ class EmbedLoader(BaseLoader):
        :param str padding: the padding tag for vocabulary.
        :param str unknown: the unknown tag for vocabulary.
        :param bool normalize: 是否将每个vector归一化到norm为1
        :param str error: 'ignore', 'strict'; 如果'ignore'，错误将自动跳过; 如果strict, 错误将抛出。这里主要可能出错的地
        :param str error: `ignore` , `strict` ; 如果 `ignore` ，错误将自动跳过; 如果 `strict` , 错误将抛出。这里主要可能出错的地
            方在于词表有空行或者词表出现了维度不一致。
        :return: numpy.ndarray, shape为 [len(vocab), dimension], dimension由pretrain的embedding决定。
        :return: numpy.ndarray,Vocabulary embedding的shape是[词表大小+x, 词表维度], "词表大小+x"是由于最终的大小还取决与
            是否使用padding, 以及unknown有没有在词表中找到对应的词。Vocabulary中的词的顺序与Embedding的顺序是一一对应的。
        :return numpy.ndarray: shape为 [len(vocab), dimension], dimension由pretrain的embedding决定。
        :return numpy.ndarray: Vocabulary Embedding的shape是[词表大小+x, 词表维度], "词表大小+x"是由于最终的大小还取决与
            是否使用padding, 以及unknown有没有在词表中找到对应的词。 Vocabulary中的词的顺序与Embedding的顺序是一一对应的。
        """
        vocab = Vocabulary(padding=padding, unknown=unknown)
        vec_dict = {}
--- a/fastNLP/io/file_reader.py
+++ b/fastNLP/io/file_reader.py
@@ -1,3 +1,6 @@
 """
 此模块用于给其它模块提供读取文件的函数，没有为用户提供 API
 """
 import json


--- a/fastNLP/io/model_io.py
+++ b/fastNLP/io/model_io.py
@@ -1,16 +1,16 @@
 """
 .. _model-io:

 用于载入和保存模型
 """
 import torch

 from fastNLP.io.base_loader import BaseLoader
 from .base_loader import BaseLoader


 class ModelLoader(BaseLoader):
    """
        Loader for models.
    别名：:class:`fastNLP.io.ModelLoader` :class:`fastNLP.io.model_io.ModelLoader`

    用于读取模型
    """

    def __init__(self):
@@ -18,24 +18,30 @@ class ModelLoader(BaseLoader):

    @staticmethod
    def load_pytorch(empty_model, model_path):
        """Load model parameters from ".pkl" files into the empty PyTorch model.
        """
        从 ".pkl" 文件读取 PyTorch 模型

        :param empty_model: a PyTorch model with initialized parameters.
        :param str model_path: the path to the saved model.
        :param empty_model: 初始化参数的 PyTorch 模型
        :param str model_path: 模型保存的路径
        """
        empty_model.load_state_dict(torch.load(model_path))

    @staticmethod
    def load_pytorch_model(model_path):
        """Load the entire model.
        """
        读取整个模型

        :param str model_path: the path to the saved model.
        :param str model_path: 模型保存的路径
        """
        return torch.load(model_path)


 class ModelSaver(object):
    """Save a model
    """
    别名：:class:`fastNLP.io.ModelSaver` :class:`fastNLP.io.model_io.ModelSaver`

    用于保存模型
    
    Example::

        saver = ModelSaver("./save/model_ckpt_100.pkl")
@@ -46,15 +52,16 @@ class ModelSaver(object):
    def __init__(self, save_path):
        """

        :param save_path: the path to the saving directory.
        :param save_path: 模型保存的路径
        """
        self.save_path = save_path

    def save_pytorch(self, model, param_only=True):
        """Save a pytorch model into ".pkl" file.
        """
        把 PyTorch 模型存入 ".pkl" 文件

        :param model: a PyTorch model
        :param bool param_only: whether only to save the model parameters or the entire model.
        :param model: PyTorch 模型
        :param bool param_only: 是否只保存模型的参数（否则保存整个模型）

        """
        if param_only is True:
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@@ -1,6 +1,6 @@
 import torch

 from fastNLP.modules.decoder.MLP import MLP
 from ..modules.decoder.MLP import MLP


 class BaseModel(torch.nn.Module):
--- a/fastNLP/models/bert.py
+++ b/fastNLP/models/bert.py
@@ -6,7 +6,7 @@ import torch
 from torch import nn

 from .base_model import BaseModel
 from fastNLP.modules.encoder import BertModel
 from ..modules.encoder import BertModel


 class BertForSequenceClassification(BaseModel):
--- a/fastNLP/models/biaffine_parser.py
+++ b/fastNLP/models/biaffine_parser.py
@@ -7,16 +7,17 @@ import torch
 from torch import nn
 from torch.nn import functional as F

 from fastNLP.core.losses import LossFunc
 from fastNLP.core.metrics import MetricBase
 from fastNLP.core.utils import seq_lens_to_masks
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules.dropout import TimestepDropout
 from fastNLP.modules.encoder.transformer import TransformerEncoder
 from fastNLP.modules.encoder.variational_rnn import VarLSTM
 from fastNLP.modules.utils import initial_parameter
 from fastNLP.modules.utils import seq_mask
 from fastNLP.modules.utils import get_embeddings
 from ..core.const import Const as C
 from ..core.losses import LossFunc
 from ..core.metrics import MetricBase
 from ..core.utils import seq_lens_to_masks
 from ..modules.dropout import TimestepDropout
 from ..modules.encoder.transformer import TransformerEncoder
 from ..modules.encoder.variational_rnn import VarLSTM
 from ..modules.utils import initial_parameter
 from ..modules.utils import seq_mask
 from ..modules.utils import get_embeddings
 from .base_model import BaseModel

 def _mst(scores):
    """
@@ -325,21 +326,20 @@ class BiaffineParser(GraphParser):
                for p in m.parameters():
                    nn.init.normal_(p, 0, 0.1)

    def forward(self, words1, words2, seq_len, gold_heads=None):
    def forward(self, words1, words2, seq_len, target1=None):
        """模型forward阶段

        :param words1: [batch_size, seq_len] 输入word序列
        :param words2: [batch_size, seq_len] 输入pos序列
        :param seq_len: [batch_size, seq_len] 输入序列长度
        :param gold_heads: [batch_size, seq_len] 输入真实标注的heads, 仅在训练阶段有效,
        :param target1: [batch_size, seq_len] 输入真实标注的heads, 仅在训练阶段有效,
            用于训练label分类器. 若为 ``None`` , 使用预测的heads输入到label分类器
            Default: ``None``
        :return dict: parsing结果::

            arc_pred: [batch_size, seq_len, seq_len] 边预测logits
            label_pred: [batch_size, seq_len, num_label] label预测logits
            mask: [batch_size, seq_len] 预测结果的mask
            head_pred: [batch_size, seq_len] heads的预测结果, 在 ``gold_heads=None`` 时预测
            pred1: [batch_size, seq_len, seq_len] 边预测logits
            pred2: [batch_size, seq_len, num_label] label预测logits
            pred3: [batch_size, seq_len] heads的预测结果, 在 ``target1=None`` 时预测
        """
        # prepare embeddings
        batch_size, length = words1.shape
@@ -365,7 +365,7 @@ class BiaffineParser(GraphParser):
            _, unsort_idx = torch.sort(sort_idx, dim=0, descending=False)
            feat = feat[unsort_idx]
        else:
            seq_range = torch.arange(seq_len, dtype=torch.long, device=x.device)[None,:]
            seq_range = torch.arange(length, dtype=torch.long, device=x.device)[None,:]
            x = x + self.position_emb(seq_range)
            feat = self.encoder(x, mask.float())

@@ -380,7 +380,7 @@ class BiaffineParser(GraphParser):
        arc_pred = self.arc_predictor(arc_head, arc_dep) # [N, L, L]

        # use gold or predicted arc to predict label
        if gold_heads is None or not self.training:
        if target1 is None or not self.training:
            # use greedy decoding in training
            if self.training or self.use_greedy_infer:
                heads = self.greedy_decoder(arc_pred, mask)
@@ -389,44 +389,45 @@ class BiaffineParser(GraphParser):
            head_pred = heads
        else:
            assert self.training # must be training mode
            if gold_heads is None:
            if target1 is None:
                heads = self.greedy_decoder(arc_pred, mask)
                head_pred = heads
            else:
                head_pred = None
                heads = gold_heads
                heads = target1

        batch_range = torch.arange(start=0, end=batch_size, dtype=torch.long, device=words1.device).unsqueeze(1)
        label_head = label_head[batch_range, heads].contiguous()
        label_pred = self.label_predictor(label_head, label_dep) # [N, L, num_label]
        res_dict = {'arc_pred': arc_pred, 'label_pred': label_pred, 'mask': mask}
        res_dict = {C.OUTPUTS(0): arc_pred, C.OUTPUTS(1): label_pred}
        if head_pred is not None:
            res_dict['head_pred'] = head_pred
            res_dict[C.OUTPUTS(2)] = head_pred
        return res_dict

    @staticmethod
    def loss(arc_pred, label_pred, arc_true, label_true, mask):
    def loss(pred1, pred2, target1, target2, seq_len):
        """
        Compute loss.

        :param arc_pred: [batch_size, seq_len, seq_len] 边预测logits
        :param label_pred: [batch_size, seq_len, num_label] label预测logits
        :param arc_true: [batch_size, seq_len] 真实边的标注
        :param label_true: [batch_size, seq_len] 真实类别的标注
        :param mask: [batch_size, seq_len] 预测结果的mask
        :return: loss value
        计算parser的loss

        :param pred1: [batch_size, seq_len, seq_len] 边预测logits
        :param pred2: [batch_size, seq_len, num_label] label预测logits
        :param target1: [batch_size, seq_len] 真实边的标注
        :param target2: [batch_size, seq_len] 真实类别的标注
        :param seq_len: [batch_size, seq_len] 真实目标的长度
        :return loss: scalar
        """

        batch_size, seq_len, _ = arc_pred.shape
        batch_size, length, _ = pred1.shape
        mask = seq_mask(seq_len, length)
        flip_mask = (mask == 0)
        _arc_pred = arc_pred.clone()
        _arc_pred = pred1.clone()
        _arc_pred.masked_fill_(flip_mask.unsqueeze(1), -float('inf'))
        arc_logits = F.log_softmax(_arc_pred, dim=2)
        label_logits = F.log_softmax(label_pred, dim=2)
        label_logits = F.log_softmax(pred2, dim=2)
        batch_index = torch.arange(batch_size, device=arc_logits.device, dtype=torch.long).unsqueeze(1)
        child_index = torch.arange(seq_len, device=arc_logits.device, dtype=torch.long).unsqueeze(0)
        arc_loss = arc_logits[batch_index, child_index, arc_true]
        label_loss = label_logits[batch_index, child_index, label_true]
        child_index = torch.arange(length, device=arc_logits.device, dtype=torch.long).unsqueeze(0)
        arc_loss = arc_logits[batch_index, child_index, target1]
        label_loss = label_logits[batch_index, child_index, target2]

        byte_mask = flip_mask.byte()
        arc_loss.masked_fill_(byte_mask, 0)
@@ -441,21 +442,16 @@ class BiaffineParser(GraphParser):
        :param words1: [batch_size, seq_len] 输入word序列
        :param words2: [batch_size, seq_len] 输入pos序列
        :param seq_len: [batch_size, seq_len] 输入序列长度
        :param gold_heads: [batch_size, seq_len] 输入真实标注的heads, 仅在训练阶段有效,
            用于训练label分类器. 若为 ``None`` , 使用预测的heads输入到label分类器
            Default: ``None``
        :return dict: parsing结果::

            arc_pred: [batch_size, seq_len, seq_len] 边预测logits
            label_pred: [batch_size, seq_len, num_label] label预测logits
            mask: [batch_size, seq_len] 预测结果的mask
            head_pred: [batch_size, seq_len] heads的预测结果, 在 ``gold_heads=None`` 时预测
            pred1: [batch_size, seq_len] heads的预测结果
            pred2: [batch_size, seq_len, num_label] label预测logits
        """
        res = self(words1, words2, seq_len)
        output = {}
        output['arc_pred'] = res.pop('head_pred')
        _, label_pred = res.pop('label_pred').max(2)
        output['label_pred'] = label_pred
        output[C.OUTPUTS(0)] = res.pop(C.OUTPUTS(2))
        _, label_pred = res.pop(C.OUTPUTS(1)).max(2)
        output[C.OUTPUTS(1)] = label_pred
        return output


@@ -463,41 +459,44 @@ class ParserLoss(LossFunc):
    """
    计算parser的loss

    :param arc_pred: [batch_size, seq_len, seq_len] 边预测logits
    :param label_pred: [batch_size, seq_len, num_label] label预测logits
    :param arc_true: [batch_size, seq_len] 真实边的标注
    :param label_true: [batch_size, seq_len] 真实类别的标注
    :param mask: [batch_size, seq_len] 预测结果的mask
    :param pred1: [batch_size, seq_len, seq_len] 边预测logits
    :param pred2: [batch_size, seq_len, num_label] label预测logits
    :param target1: [batch_size, seq_len] 真实边的标注
    :param target2: [batch_size, seq_len] 真实类别的标注
    :param seq_len: [batch_size, seq_len] 真实目标的长度
    :return loss: scalar
    """
    def __init__(self, arc_pred=None, label_pred=None, arc_true=None, label_true=None):
    def __init__(self, pred1=None, pred2=None,
                 target1=None, target2=None,
                 seq_len=None):
        super(ParserLoss, self).__init__(BiaffineParser.loss,
                                                 arc_pred=arc_pred,
                                                 label_pred=label_pred,
                                                 arc_true=arc_true,
                                                 label_true=label_true)
                                         pred1=pred1,
                                         pred2=pred2,
                                         target1=target1,
                                         target2=target2,
                                         seq_len=seq_len)


 class ParserMetric(MetricBase):
    """
    评估parser的性能

    :param arc_pred: 边预测logits
    :param label_pred: label预测logits
    :param arc_true: 真实边的标注
    :param label_true: 真实类别的标注
    :param pred1: 边预测logits
    :param pred2: label预测logits
    :param target1: 真实边的标注
    :param target2: 真实类别的标注
    :param seq_len: 序列长度
    :return dict: 评估结果::

        UAS: 不带label时, 边预测的准确率
        LAS: 同时预测边和label的准确率
    """
    def __init__(self, arc_pred=None, label_pred=None,
                 arc_true=None, label_true=None, seq_len=None):
    def __init__(self, pred1=None, pred2=None,
                 target1=None, target2=None, seq_len=None):

        super().__init__()
        self._init_param_map(arc_pred=arc_pred, label_pred=label_pred,
                             arc_true=arc_true, label_true=label_true,
        self._init_param_map(pred1=pred1, pred2=pred2,
                             target1=target1, target2=target2,
                             seq_len=seq_len)
        self.num_arc = 0
        self.num_label = 0
@@ -509,17 +508,17 @@ class ParserMetric(MetricBase):
            self.num_sample = self.num_label = self.num_arc = 0
        return res

    def evaluate(self, arc_pred, label_pred, arc_true, label_true, seq_len=None):
    def evaluate(self, pred1, pred2, target1, target2, seq_len=None):
        """Evaluate the performance of prediction.
        """
        if seq_len is None:
            seq_mask = arc_pred.new_ones(arc_pred.size(), dtype=torch.long)
            seq_mask = pred1.new_ones(pred1.size(), dtype=torch.long)
        else:
            seq_mask = seq_lens_to_masks(seq_len.long(), float=False).long()
        # mask out <root> tag
        seq_mask[:,0] = 0
        head_pred_correct = (arc_pred == arc_true).long() * seq_mask
        label_pred_correct = (label_pred == label_true).long() * head_pred_correct
        head_pred_correct = (pred1 == target1).long() * seq_mask
        label_pred_correct = (pred2 == target2).long() * head_pred_correct
        self.num_arc += head_pred_correct.sum().item()
        self.num_label += label_pred_correct.sum().item()
        self.num_sample += seq_mask.sum().item()
--- a/fastNLP/models/char_language_model.py
+++ b/fastNLP/models/char_language_model.py
@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from fastNLP.modules.encoder.lstm import LSTM
 from ..modules.encoder.lstm import LSTM


 class Highway(nn.Module):
--- a/fastNLP/models/enas_controller.py
+++ b/fastNLP/models/enas_controller.py
@@ -5,9 +5,8 @@ import os

 import torch
 import torch.nn.functional as F
 import fastNLP
 import fastNLP.models.enas_utils as utils
 from fastNLP.models.enas_utils import Node
 from . import enas_utils as utils
 from .enas_utils import Node


 def _construct_dags(prev_nodes, activations, func_names, num_blocks):
--- a/fastNLP/models/enas_model.py
+++ b/fastNLP/models/enas_model.py
@@ -9,9 +9,8 @@ from torch import nn
 import torch.nn.functional as F
 from torch.autograd import Variable

 import fastNLP.models.enas_utils as utils
 from fastNLP.models.base_model import BaseModel
 import fastNLP.modules.encoder as encoder
 from . import enas_utils as utils
 from .base_model import BaseModel

 def _get_dropped_weights(w_raw, dropout_p, is_training):
    """Drops out weights to implement DropConnect.
--- a/fastNLP/models/enas_trainer.py
+++ b/fastNLP/models/enas_trainer.py
@@ -1,6 +1,5 @@
 # Code Modified from https://github.com/carpedm20/ENAS-pytorch

 import os
 import time
 from datetime import datetime
 from datetime import timedelta
@@ -8,21 +7,19 @@ from datetime import timedelta
 import numpy as np
 import torch
 import math
 from torch import nn

 try:
    from tqdm.autonotebook import tqdm
 except:
    from fastNLP.core.utils import _pseudo_tqdm as tqdm
    from ..core.utils import _pseudo_tqdm as tqdm

 from fastNLP.core.batch import Batch
 from fastNLP.core.callback import CallbackManager, CallbackException
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.utils import _CheckError
 from fastNLP.core.utils import _move_dict_value_to_device
 import fastNLP
 import fastNLP.models.enas_utils as utils
 from fastNLP.core.utils import _build_args
 from ..core.trainer import Trainer
 from ..core.batch import Batch
 from ..core.callback import CallbackManager, CallbackException
 from ..core.dataset import DataSet
 from ..core.utils import _move_dict_value_to_device
 from . import enas_utils as utils
 from ..core.utils import _build_args

 from torch.optim import Adam

@@ -34,7 +31,7 @@ def _get_no_grad_ctx_mgr():
    return torch.no_grad()


 class ENASTrainer(fastNLP.Trainer):
 class ENASTrainer(Trainer):
    """A class to wrap training code."""
    def __init__(self, train_data, model, controller, **kwargs):
        """Constructor for training algorithm.
--- a/fastNLP/models/enas_utils.py
+++ b/fastNLP/models/enas_utils.py
@@ -4,21 +4,20 @@ from __future__ import print_function

 from collections import defaultdict
 import collections
 from datetime import datetime
 import os
 import json

 import numpy as np

 import torch
 from torch.autograd import Variable


 def detach(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(detach(v) for v in h)


 def get_variable(inputs, cuda=False, **kwargs):
    if type(inputs) in [list, np.ndarray]:
        inputs = torch.Tensor(inputs)
@@ -28,10 +27,12 @@ def get_variable(inputs, cuda=False, **kwargs):
        out = Variable(inputs, **kwargs)
    return out


 def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


 Node = collections.namedtuple('Node', ['id', 'name'])


@@ -48,9 +49,9 @@ def to_item(x):
    """Converts x, possibly scalar and possibly tensor, to a Python scalar."""
    if isinstance(x, (float, int)):
        return x

    
    if float(torch.__version__[0:3]) < 0.4:
        assert (x.dim() == 1) and (len(x) == 1)
        return x[0]

    
    return x.item()
--- a/fastNLP/models/sequence_modeling.py
+++ b/fastNLP/models/sequence_modeling.py
@@ -1,9 +1,9 @@
 import torch

 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import decoder, encoder
 from fastNLP.modules.decoder.CRF import allowed_transitions
 from fastNLP.modules.utils import seq_mask
 from .base_model import BaseModel
 from ..modules import decoder, encoder
 from ..modules.decoder.CRF import allowed_transitions
 from ..modules.utils import seq_mask


 class SeqLabeling(BaseModel):
--- a/fastNLP/models/snli.py
+++ b/fastNLP/models/snli.py
@@ -1,11 +1,11 @@
 import torch
 import torch.nn as nn

 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import decoder as Decoder
 from fastNLP.modules import encoder as Encoder
 from fastNLP.modules import aggregator as Aggregator
 from fastNLP.modules.utils import seq_mask
 from .base_model import BaseModel
 from ..modules import decoder as Decoder
 from ..modules import encoder as Encoder
 from ..modules import aggregator as Aggregator
 from ..modules.utils import seq_mask


 my_inf = 10e12
--- a/fastNLP/models/star_transformer.py
+++ b/fastNLP/models/star_transformer.py
@@ -1,12 +1,12 @@
 """Star-Transformer 的 一个 Pytorch 实现.
 """
 from fastNLP.modules.encoder.star_transformer import StarTransformer
 from fastNLP.core.utils import seq_lens_to_masks
 from ..modules.encoder.star_transformer import StarTransformer
 from ..core.utils import seq_lens_to_masks
 from ..modules.utils import get_embeddings
 from ..core.const import Const

 import torch
 from torch import nn
 import torch.nn.functional as F


 class StarTransEnc(nn.Module):
@@ -107,7 +107,7 @@ class STSeqLabel(nn.Module):
    :param emb_dropout: 词嵌入的dropout概率. Default: 0.1
    :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
    """
    def __init__(self, vocab_size, emb_dim, num_cls,
    def __init__(self, init_embed, num_cls,
                 hidden_size=300,
                 num_layers=4,
                 num_head=8,
@@ -117,8 +117,7 @@ class STSeqLabel(nn.Module):
                 emb_dropout=0.1,
                 dropout=0.1,):
        super(STSeqLabel, self).__init__()
        self.enc = StarTransEnc(vocab_size=vocab_size,
                                emb_dim=emb_dim,
        self.enc = StarTransEnc(init_embed=init_embed,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                num_head=num_head,
@@ -139,7 +138,7 @@ class STSeqLabel(nn.Module):
        nodes, _ = self.enc(words, mask)
        output = self.cls(nodes)
        output = output.transpose(1,2) # make hidden to be dim 1
        return {'output': output} # [bsz, n_cls, seq_len]
        return {Const.OUTPUT: output} # [bsz, n_cls, seq_len]

    def predict(self, words, seq_len):
        """
@@ -149,8 +148,8 @@ class STSeqLabel(nn.Module):
        :return output: [batch, seq_len] 输出序列中每个元素的分类
        """
        y = self.forward(words, seq_len)
        _, pred = y['output'].max(1)
        return {'output': pred}
        _, pred = y[Const.OUTPUT].max(1)
        return {Const.OUTPUT: pred}


 class STSeqCls(nn.Module):
@@ -169,7 +168,7 @@ class STSeqCls(nn.Module):
    :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
    """

    def __init__(self, vocab_size, emb_dim, num_cls,
    def __init__(self, init_embed, num_cls,
                 hidden_size=300,
                 num_layers=4,
                 num_head=8,
@@ -179,8 +178,7 @@ class STSeqCls(nn.Module):
                 emb_dropout=0.1,
                 dropout=0.1,):
        super(STSeqCls, self).__init__()
        self.enc = StarTransEnc(vocab_size=vocab_size,
                                emb_dim=emb_dim,
        self.enc = StarTransEnc(init_embed=init_embed,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                num_head=num_head,
@@ -201,7 +199,7 @@ class STSeqCls(nn.Module):
        nodes, relay = self.enc(words, mask)
        y = 0.5 * (relay + nodes.max(1)[0])
        output = self.cls(y) # [bsz, n_cls]
        return {'output': output}
        return {Const.OUTPUT: output}

    def predict(self, words, seq_len):
        """
@@ -211,8 +209,8 @@ class STSeqCls(nn.Module):
        :return output: [batch, num_cls] 输出序列的分类
        """
        y = self.forward(words, seq_len)
        _, pred = y['output'].max(1)
        return {'output': pred}
        _, pred = y[Const.OUTPUT].max(1)
        return {Const.OUTPUT: pred}


 class STNLICls(nn.Module):
@@ -231,7 +229,7 @@ class STNLICls(nn.Module):
    :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
    """

    def __init__(self, vocab_size, emb_dim, num_cls,
    def __init__(self, init_embed, num_cls,
                 hidden_size=300,
                 num_layers=4,
                 num_head=8,
@@ -241,8 +239,7 @@ class STNLICls(nn.Module):
                 emb_dropout=0.1,
                 dropout=0.1,):
        super(STNLICls, self).__init__()
        self.enc = StarTransEnc(vocab_size=vocab_size,
                                emb_dim=emb_dim,
        self.enc = StarTransEnc(init_embed=init_embed,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                num_head=num_head,
@@ -269,7 +266,7 @@ class STNLICls(nn.Module):
        y1 = enc(words1, mask1)
        y2 = enc(words2, mask2)
        output = self.cls(y1, y2) # [bsz, n_cls]
        return {'output': output}
        return {Const.OUTPUT: output}

    def predict(self, words1, words2, seq_len1, seq_len2):
        """
@@ -281,5 +278,5 @@ class STNLICls(nn.Module):
        :return output: [batch, num_cls] 输出分类的概率
        """
        y = self.forward(words1, words2, seq_len1, seq_len2)
        _, pred = y['output'].max(1)
        return {'output': pred}
        _, pred = y[Const.OUTPUT].max(1)
        return {Const.OUTPUT: pred}
--- a/fastNLP/modules/aggregator/attention.py
+++ b/fastNLP/modules/aggregator/attention.py
@@ -4,10 +4,10 @@ import torch
 import torch.nn.functional as F
 from torch import nn

 from fastNLP.modules.dropout import TimestepDropout
 from fastNLP.modules.utils import mask_softmax
 from ..dropout import TimestepDropout
 from ..utils import mask_softmax

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 class Attention(torch.nn.Module):
--- a/fastNLP/modules/aggregator/pooling.py
+++ b/fastNLP/modules/aggregator/pooling.py
@@ -1,17 +1,12 @@
 # python: 3.6
 # encoding: utf-8

 import torch
 import torch.nn as nn


 class MaxPool(nn.Module):
    """Max-pooling模块。"""

    def __init__(
            self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None,
            return_indices=False, ceil_mode=False
    ):
    
    def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None,
                 return_indices=False, ceil_mode=False):
        """
        :param stride: 窗口移动大小，默认为kernel_size
        :param padding: padding的内容，默认为0
@@ -30,7 +25,7 @@ class MaxPool(nn.Module):
        self.kernel_size = kernel_size
        self.return_indices = return_indices
        self.ceil_mode = ceil_mode

    
    def forward(self, x):
        if self.dimension == 1:
            pooling = nn.MaxPool1d(
@@ -57,10 +52,11 @@ class MaxPool(nn.Module):

 class MaxPoolWithMask(nn.Module):
    """带mask矩阵的1维max pooling"""
    
    def __init__(self):
        super(MaxPoolWithMask, self).__init__()
        self.inf = 10e12

    
    def forward(self, tensor, mask, dim=1):
        """
        :param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor
@@ -75,11 +71,11 @@ class MaxPoolWithMask(nn.Module):

 class KMaxPool(nn.Module):
    """K max-pooling module."""

    
    def __init__(self, k=1):
        super(KMaxPool, self).__init__()
        self.k = k

    
    def forward(self, x):
        """
        :param torch.Tensor x: [N, C, L] 初始tensor
@@ -92,12 +88,12 @@ class KMaxPool(nn.Module):

 class AvgPool(nn.Module):
    """1-d average pooling module."""

    
    def __init__(self, stride=None, padding=0):
        super(AvgPool, self).__init__()
        self.stride = stride
        self.padding = padding

    
    def forward(self, x):
        """
        :param torch.Tensor x: [N, C, L] 初始tensor
@@ -117,7 +113,7 @@ class MeanPoolWithMask(nn.Module):
    def __init__(self):
        super(MeanPoolWithMask, self).__init__()
        self.inf = 10e12

    
    def forward(self, tensor, mask, dim=1):
        """
        :param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor
@@ -127,7 +123,3 @@ class MeanPoolWithMask(nn.Module):
        """
        masks = mask.view(mask.size(0), mask.size(1), -1).float()
        return torch.sum(tensor * masks.float(), dim=dim) / torch.sum(masks.float(), dim=1)




--- a/fastNLP/modules/decoder/CRF.py
+++ b/fastNLP/modules/decoder/CRF.py
@@ -1,8 +1,8 @@
 import torch
 from torch import nn

 from fastNLP.modules.utils import initial_parameter
 from fastNLP.modules.decoder.utils import log_sum_exp
 from ..utils import initial_parameter
 from ..decoder.utils import log_sum_exp


 def seq_len_to_byte_mask(seq_lens):
--- a/fastNLP/modules/decoder/MLP.py
+++ b/fastNLP/modules/decoder/MLP.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 class MLP(nn.Module):
--- a/fastNLP/modules/encoder/char_encoder.py
+++ b/fastNLP/modules/encoder/char_encoder.py
@@ -1,7 +1,7 @@
 import torch
 from torch import nn

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 # from torch.nn.init import xavier_uniform
--- a/fastNLP/modules/encoder/conv_maxpool.py
+++ b/fastNLP/modules/encoder/conv_maxpool.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 class ConvMaxpool(nn.Module):
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@@ -1,5 +1,5 @@
 import torch.nn as nn
 from fastNLP.modules.utils import get_embeddings
 from ..utils import get_embeddings

 class Embedding(nn.Embedding):
    """Embedding组件. 可以通过self.num_embeddings获取词表大小; self.embedding_dim获取embedding的维度"""
--- a/fastNLP/modules/encoder/linear.py
+++ b/fastNLP/modules/encoder/linear.py
@@ -1,6 +1,6 @@
 import torch.nn as nn

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 class Linear(nn.Module):
--- a/fastNLP/modules/encoder/lstm.py
+++ b/fastNLP/modules/encoder/lstm.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.utils.rnn as rnn

 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter


 class LSTM(nn.Module):
--- a/fastNLP/modules/encoder/variational_rnn.py
+++ b/fastNLP/modules/encoder/variational_rnn.py
@@ -3,7 +3,7 @@
 import torch
 import torch.nn as nn
 from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
 from fastNLP.modules.utils import initial_parameter
 from ..utils import initial_parameter

 try:
    from torch import flip
--- a/test/core/test_batch.py
+++ b/test/core/test_batch.py
@@ -142,13 +142,12 @@ class TestCase1(unittest.TestCase):

    def test_sequential_batch(self):
        batch_size = 32
        pause_seconds = 0.01
        num_samples = 1000
        dataset = generate_fake_dataset(num_samples)

        batch = Batch(dataset, batch_size=batch_size, sampler=SequentialSampler())
        for batch_x, batch_y in batch:
            time.sleep(pause_seconds)
            pass

    """
    def test_multi_workers_batch(self):
--- a/test/core/test_callbacks.py
+++ b/test/core/test_callbacks.py
@@ -3,7 +3,7 @@ import unittest
 import numpy as np
 import torch

 from fastNLP.core.callback import EchoCallback, EarlyStopCallback, GradientClipCallback, LRScheduler, ControlC, \
 from fastNLP.core.callback import EarlyStopCallback, GradientClipCallback, LRScheduler, ControlC, \
    LRFinder, \
    TensorboardCallback
 from fastNLP.core.dataset import DataSet
--- a/test/core/test_metrics.py
+++ b/test/core/test_metrics.py
@@ -132,6 +132,19 @@ class TestAccuracyMetric(unittest.TestCase):
            return
        self.assertTrue(True, False), "No exception catches."

    def test_seq_len(self):
        N = 256
        seq_len = torch.zeros(N).long()
        seq_len[0] = 2
        pred = {'pred': torch.ones(N, 2)}
        target = {'target': torch.ones(N, 2), 'seq_len': seq_len}
        metric = AccuracyMetric()
        metric(pred_dict=pred, target_dict=target)
        self.assertDictEqual(metric.get_metric(), {'acc': 1.})
        seq_len[1:] = 1
        metric(pred_dict=pred, target_dict=target)
        self.assertDictEqual(metric.get_metric(), {'acc': 1.})

 class SpanF1PreRecMetric(unittest.TestCase):
    def test_case1(self):
        from fastNLP.core.metrics import _bmes_tag_to_spans
--- a/test/io/test_dataset_loader.py
+++ b/test/io/test_dataset_loader.py
@@ -1,7 +1,7 @@
 import unittest

 from fastNLP.io.dataset_loader import Conll2003Loader, PeopleDailyCorpusLoader, \
    CSVLoader, SNLILoader
    CSVLoader, SNLILoader, JsonLoader

 class TestDatasetLoader(unittest.TestCase):

@@ -24,3 +24,8 @@ class TestDatasetLoader(unittest.TestCase):
    def test_SNLILoader(self):
        ds = SNLILoader().load('test/data_for_tests/sample_snli.jsonl')
        assert len(ds) == 3

    def test_JsonLoader(self):
        ds = JsonLoader().load('test/data_for_tests/sample_snli.jsonl')
        assert len(ds) == 3

--- a/test/models/init.py
+++ b/test/models/init.py
--- a/test/models/model_runner.py
+++ b/test/models/model_runner.py
@@ -0,0 +1,151 @@
 """
 此模块可以非常方便的测试模型。
 若你的模型属于：文本分类，序列标注，自然语言推理（NLI），可以直接使用此模块测试
 若模型不属于上述类别，也可以自己准备假数据，设定loss和metric进行测试

 此模块的测试仅保证模型能使用fastNLP进行训练和测试，不测试模型实际性能

 Example::
    # import 全大写变量...
    from model_runner import *

    # 测试一个文本分类模型
    init_emb = (VOCAB_SIZE, 50)
    model = SomeModel(init_emb, num_cls=NUM_CLS)
    RUNNER.run_model_with_task(TEXT_CLS, model)

    # 序列标注模型
    RUNNER.run_model_with_task(POS_TAGGING, model)

    # NLI模型
    RUNNER.run_model_with_task(NLI, model)

    # 自定义模型
    RUNNER.run_model(model, data=get_mydata(),
     loss=Myloss(), metrics=Mymetric())
 """
 from fastNLP import Trainer, Tester, DataSet
 from fastNLP import AccuracyMetric
 from fastNLP import CrossEntropyLoss
 from fastNLP.core.const import Const as C
 from random import randrange

 VOCAB_SIZE = 100
 NUM_CLS = 100
 MAX_LEN = 10
 N_SAMPLES = 100
 N_EPOCHS = 1
 BATCH_SIZE = 5

 TEXT_CLS = 'text_cls'
 POS_TAGGING = 'pos_tagging'
 NLI = 'nli'

 class ModelRunner():
    def gen_seq(self, length, vocab_size):
        """generate fake sequence indexes with given length"""
        # reserve 0 for padding
        return [randrange(1, vocab_size) for _ in range(length)]

    def gen_var_seq(self, max_len, vocab_size):
        """generate fake sequence indexes in variant length"""
        length = randrange(3, max_len) # at least 3 words in a seq
        return self.gen_seq(length, vocab_size)

    def prepare_text_classification_data(self):
        index = 'index'
        ds = DataSet({index: list(range(N_SAMPLES))})
        ds.apply_field(lambda x: self.gen_var_seq(MAX_LEN, VOCAB_SIZE),
                       field_name=index, new_field_name=C.INPUT,
                       is_input=True)
        ds.apply_field(lambda x: randrange(NUM_CLS),
                       field_name=index, new_field_name=C.TARGET,
                       is_target=True)
        ds.apply_field(len, C.INPUT, C.INPUT_LEN,
                       is_input=True)
        return ds

    def prepare_pos_tagging_data(self):
        index = 'index'
        ds = DataSet({index: list(range(N_SAMPLES))})
        ds.apply_field(lambda x: self.gen_var_seq(MAX_LEN, VOCAB_SIZE),
                       field_name=index, new_field_name=C.INPUT,
                       is_input=True)
        ds.apply_field(lambda x: self.gen_seq(len(x), NUM_CLS),
                       field_name=C.INPUT, new_field_name=C.TARGET,
                       is_target=True)
        ds.apply_field(len, C.INPUT, C.INPUT_LEN,
                       is_input=True, is_target=True)
        return ds

    def prepare_nli_data(self):
        index = 'index'
        ds = DataSet({index: list(range(N_SAMPLES))})
        ds.apply_field(lambda x: self.gen_var_seq(MAX_LEN, VOCAB_SIZE),
                       field_name=index, new_field_name=C.INPUTS(0),
                       is_input=True)
        ds.apply_field(lambda x: self.gen_var_seq(MAX_LEN, VOCAB_SIZE),
                       field_name=index, new_field_name=C.INPUTS(1),
                       is_input=True)
        ds.apply_field(lambda x: randrange(NUM_CLS),
                       field_name=index, new_field_name=C.TARGET,
                       is_target=True)
        ds.apply_field(len, C.INPUTS(0), C.INPUT_LENS(0),
                       is_input=True, is_target=True)
        ds.apply_field(len, C.INPUTS(1), C.INPUT_LENS(1),
                       is_input = True, is_target = True)
        ds.set_input(C.INPUTS(0), C.INPUTS(1))
        ds.set_target(C.TARGET)
        return ds

    def run_text_classification(self, model, data=None):
        if data is None:
            data = self.prepare_text_classification_data()
        loss = CrossEntropyLoss(pred=C.OUTPUT, target=C.TARGET)
        metric = AccuracyMetric(pred=C.OUTPUT, target=C.TARGET)
        self.run_model(model, data, loss, metric)

    def run_pos_tagging(self, model, data=None):
        if data is None:
            data = self.prepare_pos_tagging_data()
        loss = CrossEntropyLoss(pred=C.OUTPUT, target=C.TARGET, padding_idx=0)
        metric = AccuracyMetric(pred=C.OUTPUT, target=C.TARGET, seq_len=C.INPUT_LEN)
        self.run_model(model, data, loss, metric)

    def run_nli(self, model, data=None):
        if data is None:
            data = self.prepare_nli_data()
        loss = CrossEntropyLoss(pred=C.OUTPUT, target=C.TARGET)
        metric = AccuracyMetric(pred=C.OUTPUT, target=C.TARGET)
        self.run_model(model, data, loss, metric)

    def run_model(self, model, data, loss, metrics):
        """run a model, test if it can run with fastNLP"""
        print('testing model:', model.__class__.__name__)
        tester = Tester(data=data, model=model, metrics=metrics,
                        batch_size=BATCH_SIZE, verbose=0)
        before_train = tester.test()
        trainer = Trainer(model=model, train_data=data, dev_data=None,
                          n_epochs=N_EPOCHS, batch_size=BATCH_SIZE,
                          loss=loss,
                          save_path=None,
                          use_tqdm=False)
        trainer.train(load_best_model=False)
        after_train = tester.test()
        for metric_name, v1 in before_train.items():
            assert metric_name in after_train
            # # at least we can sure model params changed, even if we don't know performance
            # v2 = after_train[metric_name]
            # assert v1 != v2

    def run_model_with_task(self, task, model):
        """run a model with certain task"""
        TASKS = {
            TEXT_CLS: self.run_text_classification,
            POS_TAGGING: self.run_pos_tagging,
            NLI: self.run_nli,
        }
        assert task in TASKS
        TASKS[task](model)

 RUNNER = ModelRunner()
--- a/test/models/test_biaffine_parser.py
+++ b/test/models/test_biaffine_parser.py
@@ -2,90 +2,33 @@ import unittest

 import fastNLP
 from fastNLP.models.biaffine_parser import BiaffineParser, ParserLoss, ParserMetric

 data_file = """
 1       The     _       DET     DT      _       3       det     _       _
 2       new     _       ADJ     JJ      _       3       amod    _       _
 3       rate    _       NOUN    NN      _       6       nsubj   _       _
 4       will    _       AUX     MD      _       6       aux     _       _
 5       be      _       VERB    VB      _       6       cop     _       _
 6       payable _       ADJ     JJ      _       0       root    _       _
 7       mask    _       ADJ     JJ      _       6       punct    _       _
 8       mask    _       ADJ     JJ      _       6       punct    _       _
 9       cents   _       NOUN    NNS     _       4       nmod    _       _
 10      from    _       ADP     IN      _       12      case    _       _
 11      seven   _       NUM     CD      _       12      nummod  _       _
 12      cents   _       NOUN    NNS     _       4       nmod    _       _
 13      a       _       DET     DT      _       14      det     _       _
 14      share   _       NOUN    NN      _       12      nmod:npmod      _       _
 15      .       _       PUNCT   .       _       4       punct   _       _

 1       The     _       DET     DT      _       3       det     _       _
 2       new     _       ADJ     JJ      _       3       amod    _       _
 3       rate    _       NOUN    NN      _       6       nsubj   _       _
 4       will    _       AUX     MD      _       6       aux     _       _
 5       be      _       VERB    VB      _       6       cop     _       _
 6       payable _       ADJ     JJ      _       0       root    _       _
 7       Feb.    _       PROPN   NNP     _       6       nmod:tmod       _       _
 8       15      _       NUM     CD      _       7       nummod  _       _
 9       .       _       PUNCT   .       _       6       punct   _       _

 1       A       _       DET     DT      _       3       det     _       _
 2       record  _       NOUN    NN      _       3       compound        _       _
 3       date    _       NOUN    NN      _       7       nsubjpass       _       _
 4       has     _       AUX     VBZ     _       7       aux     _       _
 5       n't     _       PART    RB      _       7       neg     _       _
 6       been    _       AUX     VBN     _       7       auxpass _       _
 7       set     _       VERB    VBN     _       0       root    _       _
 8       .       _       PUNCT   .       _       7       punct   _       _

 """


 def init_data():
    ds = fastNLP.DataSet()
    v = {'words1': fastNLP.Vocabulary(),
         'words2': fastNLP.Vocabulary(),
         'label_true': fastNLP.Vocabulary()}
    data = []
    for line in data_file.split('\n'):
        line = line.split()
        if len(line) == 0 and len(data) > 0:
            data = list(zip(*data))
            ds.append(fastNLP.Instance(words1=data[1],
                                       words2=data[4],
                                       arc_true=data[6],
                                       label_true=data[7]))
            data = []
        elif len(line) > 0:
            data.append(line)

    for name in ['words1', 'words2', 'label_true']:
        ds.apply(lambda x: ['<st>'] + list(x[name]), new_field_name=name)
        ds.apply(lambda x: v[name].add_word_lst(x[name]))

    for name in ['words1', 'words2', 'label_true']:
        ds.apply(lambda x: [v[name].to_index(w) for w in x[name]], new_field_name=name)

    ds.apply(lambda x: [0] + list(map(int, x['arc_true'])), new_field_name='arc_true')
    ds.apply(lambda x: len(x['words1']), new_field_name='seq_len')
    ds.set_input('words1', 'words2', 'seq_len', flag=True)
    ds.set_target('arc_true', 'label_true', 'seq_len', flag=True)
    return ds, v['words1'], v['words2'], v['label_true']

 from .model_runner import *


 def prepare_parser_data():
    index = 'index'
    ds = DataSet({index: list(range(N_SAMPLES))})
    ds.apply_field(lambda x: RUNNER.gen_var_seq(MAX_LEN, VOCAB_SIZE),
                   field_name=index, new_field_name=C.INPUTS(0),
                   is_input=True)
    ds.apply_field(lambda x: RUNNER.gen_seq(len(x), NUM_CLS),
                   field_name=C.INPUTS(0), new_field_name=C.INPUTS(1),
                   is_input=True)
    # target1 is heads, should in range(0, len(words))
    ds.apply_field(lambda x: RUNNER.gen_seq(len(x), len(x)),
                   field_name=C.INPUTS(0), new_field_name=C.TARGETS(0),
                   is_target=True)
    ds.apply_field(lambda x: RUNNER.gen_seq(len(x), NUM_CLS),
                   field_name=C.INPUTS(0), new_field_name=C.TARGETS(1),
                   is_target=True)
    ds.apply_field(len, field_name=C.INPUTS(0), new_field_name=C.INPUT_LEN,
                   is_input=True, is_target=True)
    return ds

 class TestBiaffineParser(unittest.TestCase):
    def test_train(self):
        ds, v1, v2, v3 = init_data()
        model = BiaffineParser(word_vocab_size=len(v1), word_emb_dim=30,
                               pos_vocab_size=len(v2), pos_emb_dim=30,
                               num_label=len(v3), encoder='var-lstm')
        trainer = fastNLP.Trainer(model=model, train_data=ds, dev_data=ds,
                                  loss=ParserLoss(), metrics=ParserMetric(), metric_key='UAS',
                                  batch_size=1, validate_every=10,
                                  n_epochs=10, use_cuda=False, use_tqdm=False)
        trainer.train(load_best_model=False)


 if __name__ == '__main__':
    unittest.main()
        model = BiaffineParser(init_embed=(VOCAB_SIZE, 30),
                               pos_vocab_size=VOCAB_SIZE, pos_emb_dim=30,
                               num_label=NUM_CLS, encoder='var-lstm')
        ds = prepare_parser_data()
        RUNNER.run_model(model, ds, loss=ParserLoss(), metrics=ParserMetric())
--- a/test/models/test_star_trans.py
+++ b/test/models/test_star_trans.py
@@ -0,0 +1,16 @@
 from .model_runner import *
 from fastNLP.models.star_transformer import STNLICls, STSeqCls, STSeqLabel


 # add star-transformer tests, for 3 kinds of tasks.
 def test_cls():
    model = STSeqCls((VOCAB_SIZE, 100), NUM_CLS, dropout=0)
    RUNNER.run_model_with_task(TEXT_CLS, model)

 def test_nli():
    model = STNLICls((VOCAB_SIZE, 100), NUM_CLS, dropout=0)
    RUNNER.run_model_with_task(NLI, model)

 def test_seq_label():
    model = STSeqLabel((VOCAB_SIZE, 100), NUM_CLS, dropout=0)
    RUNNER.run_model_with_task(POS_TAGGING, model)