Merge pull request #2 from fastnlp/dev0.5.0

Dev0.5.0
6 years ago · a504c416fc
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,16 @@
 .gitignore
 .DS_Store
 .ipynb_checkpoints
 *.pyc
 __pycache__
 *.swp
 .vscode/
 .idea/**
 caches
 # fitlog
 .fitlog
 logs/
 .fitconfig
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,7 +8,7 @@ install:
  - pip install pytest-cov
 # command to run tests
 script:
  - pytest --cov=./
  - pytest --cov=./ test/
 after_success:
  - bash <(curl -s https://codecov.io/bash)
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -56,6 +56,7 @@ fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models
    快速入门 <user/quickstart>
    详细指南 <user/tutorial_one>
    科研指南 <user/with_fitlog>
    注释语法 <user/example>
 API 文档
 -------------
--- a/docs/source/user/example.rst
+++ b/docs/source/user/example.rst
@@ -0,0 +1,104 @@
 ======
 大标题
 ======
 .. note::
    中文标题需要符号的数量至少是中文字数的两倍
 .. warning::
    符号的数量只可以多，不可以少。
 小标题1
 ###########
 小标题2
 *********
 小标题3(正常使用)
 ========================
 小标题4
 -------------------
 参考 http://docutils.sourceforge.net/docs/user/rst/quickref.html
 常见语法
 ============
 *emphasis*
 **strong**
 `text`
 ``inline literal``
 http://docutils.sf.net/ 孤立的网址会自动生成链接
 显示为特定的文字的链接 `sohu <http://www.sohu.com>`_
 突出显示的
    上面文字
 正常缩进
    形成锻炼
 特殊模块
 ============
 选项会自动识别
 -v           An option
 -o file      Same with value
 --delta      A long option
 --delta=len  Same with value
 图片
 .. image:: ../figures/procedures.PNG
    :height: 200
    :width: 560
    :scale: 50
    :alt: alternate text
    :align: center
 显示一个冒号的代码块::
    中间要空一行
 ::
    不显示冒号的代码块
 .. code-block:: python
    :linenos:
    :emphasize-lines: 1,3
    print("专业的代码块")
    print("")
    print("有行号和高亮")
 数学块
 .. math::
    H_2O + Na = NaOH + H_2 \uparrow
 各种连接
 ===========
 :doc:`/user/with_fitlog`
 :mod:`~fastNLP.core.batch`
 :class:`~fastNLP.Batch`
 ~表示指显示最后一项
 :meth:`fastNLP.DataSet.apply`
--- a/fastNLP/core/batch.py
+++ b/fastNLP/core/batch.py
@@ -12,6 +12,7 @@ from queue import Empty, Full
 import numpy as np
 import torch
 import torch.multiprocessing as mp
 from numbers import Number
 from .sampler import RandomSampler
@@ -78,8 +79,10 @@ class Batch(object):
            for field_name, field in self.dataset.get_all_fields().items():
                if field.is_target or field.is_input:
                    batch = field.get(indices)
                    if not self.as_numpy and field.padder is not None:
                        batch = _to_tensor(batch, field.dtype)
                    if not self.as_numpy and \
                            field.dtype is not None and \
                                issubclass(field.dtype, Number) and not isinstance(batch, torch.Tensor):
                        batch = _to_tensor(batch)
                    if field.is_target:
                        batch_y[field_name] = batch
                    if field.is_input:
@@ -174,12 +177,12 @@ class Batch(object):
        # print('iter done')
 def _to_tensor(batch, dtype):
 def _to_tensor(batch):
    try:
        if dtype in (int, np.int8, np.int16, np.int32, np.int64):
            batch = torch.LongTensor(batch)
        if dtype in (float, np.float32, np.float64):
            batch = torch.FloatTensor(batch)
        if issubclass(batch.dtype.type, np.floating):
            batch = torch.as_tensor(batch).float()  # 默认使用float32
        else:
            batch = torch.as_tensor(batch)  # 复用内存地址，避免复制
    except:
        pass
    return batch
--- a/fastNLP/core/callback.py
+++ b/fastNLP/core/callback.py
@@ -438,26 +438,29 @@ class EarlyStopCallback(Callback):
 class FitlogCallback(Callback):
    """
    该callback将loss和progress自动写入到fitlog中; 如果Trainer有dev的数据，将自动把dev的结果写入到log中; 同时还支持传入
    一个(或多个)test数据集进行测试(只有在trainer具有dev时才能使用)，每次在dev上evaluate之后会在这些数据集上验证一下。
    并将验证结果写入到fitlog中。这些数据集的结果是根据dev上最好的结果报道的，即如果dev在第3个epoch取得了最佳，则
    fitlog中记录的关于这些数据集的结果就是来自第三个epoch的结果。
    别名: :class:`fastNLP.FitlogCallback` :class:`fastNLP.core.callback.FitlogCallback`
    该callback可将loss和progress写入到fitlog中; 如果Trainer有dev的数据，将自动把dev的结果写入到log中; 同时还支持传入
        一个(或多个)test数据集进行测试(只有在trainer具有dev时才能使用)，每次在dev上evaluate之后会在这些数据集上验证一下。
        并将验证结果写入到fitlog中。这些数据集的结果是根据dev上最好的结果报道的，即如果dev在第3个epoch取得了最佳，则
        fitlog中记录的关于这些数据集的结果就是来自第三个epoch的结果。
    :param DataSet,dict(DataSet) data: 传入DataSet对象，会使用多个Trainer中的metric对数据进行验证。如果需要传入多个
        DataSet请通过dict的方式传入，dict的key将作为对应dataset的name传递给fitlog。若tester不为None时，data需要通过
        dict的方式传入。如果仅传入DataSet, 则被命名为test
    :param Tester tester: Tester对象，将在on_valid_end时调用。tester中的DataSet会被称为为`test`
    :param int verbose: 是否在终端打印内容，0不打印
    :param int log_loss_every: 多少个step记录一次loss(记录的是这几个batch的loss平均值)，如果数据集较大建议将该值设置得
        大一些，不然会导致log文件巨大。默认为0, 即不要记录loss。
    :param int verbose: 是否在终端打印evaluation的结果，0不打印。
    :param bool log_exception: fitlog是否记录发生的exception信息
    """
    # 还没有被导出到 fastNLP 层
    # 别名: :class:`fastNLP.FitlogCallback` :class:`fastNLP.core.callback.FitlogCallback`
    def __init__(self, data=None, tester=None, verbose=0, log_exception=False):
    def __init__(self, data=None, tester=None, log_loss_every=0, verbose=0, log_exception=False):
        super().__init__()
        self.datasets = {}
        self.testers = {}
        self._log_exception = log_exception
        assert isinstance(log_loss_every, int) and log_loss_every>=0
        if tester is not None:
            assert isinstance(tester, Tester), "Only fastNLP.Tester allowed."
            assert isinstance(data, dict) or data is None, "If tester is not None, only dict[DataSet] allowed for data."
@@ -477,7 +480,9 @@ class FitlogCallback(Callback):
            raise TypeError("data receives dict[DataSet] or DataSet object.")
        self.verbose = verbose
        self._log_loss_every = log_loss_every
        self._avg_loss = 0
    def on_train_begin(self):
        if (len(self.datasets) > 0 or len(self.testers) > 0) and self.trainer.dev_data is None:
            raise RuntimeError("Trainer has no dev data, you cannot pass extra data to do evaluation.")
@@ -490,8 +495,12 @@ class FitlogCallback(Callback):
        fitlog.add_progress(total_steps=self.n_steps)
    def on_backward_begin(self, loss):
        fitlog.add_loss(loss.item(), name='loss', step=self.step, epoch=self.epoch)
        if self._log_loss_every>0:
            self._avg_loss += loss.item()
            if self.step%self._log_loss_every==0:
                fitlog.add_loss(self._avg_loss/self._log_loss_every, name='loss', step=self.step, epoch=self.epoch)
                self._avg_loss = 0
    def on_valid_end(self, eval_result, metric_key, optimizer, better_result):
        if better_result:
            eval_result = deepcopy(eval_result)
@@ -518,7 +527,7 @@ class FitlogCallback(Callback):
    def on_exception(self, exception):
        fitlog.finish(status=1)
        if self._log_exception:
            fitlog.add_other(str(exception), name='except_info')
            fitlog.add_other(repr(exception), name='except_info')
 class LRScheduler(Callback):
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -285,7 +285,8 @@ from .field import AutoPadder
 from .field import FieldArray
 from .instance import Instance
 from .utils import _get_func_signature
 from .field import AppendToTargetOrInputException
 from .field import SetInputOrTargetException
 class DataSet(object):
    """
@@ -422,7 +423,7 @@ class DataSet(object):
        if len(self.field_arrays) == 0:
            # DataSet has no field yet
            for name, field in instance.fields.items():
                field = field.tolist() if isinstance(field, np.ndarray) else field
                # field = field.tolist() if isinstance(field, np.ndarray) else field
                self.field_arrays[name] = FieldArray(name, [field])  # 第一个样本，必须用list包装起来
        else:
            if len(self.field_arrays) != len(instance.fields):
@@ -431,7 +432,11 @@ class DataSet(object):
                        .format(len(self.field_arrays), len(instance.fields)))
            for name, field in instance.fields.items():
                assert name in self.field_arrays
                self.field_arrays[name].append(field)
                try:
                    self.field_arrays[name].append(field)
                except AppendToTargetOrInputException as e:
                    print(f"Cannot append to field:{name}.")
                    raise e
    def add_fieldarray(self, field_name, fieldarray):
        """
@@ -565,7 +570,11 @@ class DataSet(object):
        assert isinstance(flag, bool), "Only bool type supported."
        for name in field_names:
            if name in self.field_arrays:
                self.field_arrays[name].is_target = flag
                try:
                    self.field_arrays[name].is_target = flag
                except SetInputOrTargetException as e:
                    print(f"Cannot set field:{name} as target.")
                    raise e
            else:
                raise KeyError("{} is not a valid field name.".format(name))
@@ -581,7 +590,11 @@ class DataSet(object):
        """
        for name in field_names:
            if name in self.field_arrays:
                self.field_arrays[name].is_input = flag
                try:
                    self.field_arrays[name].is_input = flag
                except SetInputOrTargetException as e:
                    print(f"Cannot set field:{name} as input.")
                    raise e
            else:
                raise KeyError("{} is not a valid field name.".format(name))
--- a/fastNLP/core/field.py
+++ b/fastNLP/core/field.py
@@ -1,251 +1,162 @@
 """
 field模块实现了 FieldArray 和若干 Padder。 FieldArray 是  :class:`~fastNLP.DataSet` 中一列的存储方式，
 原理部分请参考 :doc:`fastNLP.core.dataset`
 """
 __all__ = [
    "FieldArray",
    "Padder",
    "AutoPadder",
    "EngChar2DPadder"
 ]
 from copy import deepcopy
 from numbers import Number
 import torch
 import numpy as np
 from typing import Any
 from abc import abstractmethod
 from copy import deepcopy
 class FieldArray(object):
    """
    别名：:class:`fastNLP.FieldArray` :class:`fastNLP.core.field.FieldArray`
    FieldArray 是用于保存 :class:`~fastNLP.DataSet` 中一个field的类型。
    :param str name: FieldArray的名称
    :param list,numpy.ndarray content: 列表的元素可以为list，int，float，
    :param bool is_target: 这个field是否是一个target field。
    :param bool is_input: 这个field是否是一个input field。
    :param padder: :class:`~fastNLP.Padder` 类型。赋值给fieldarray的padder的对象会被deepcopy一份，需要修改padder参数必须通过
       fieldarray.set_pad_val()。默认为None，即使用 :class:`~fastNLP.AutoPadder`  。
    :param bool ignore_type: 是否忽略该field的type，一般如果这个field不需要转为torch.FloatTensor或torch.LongTensor,
        就可以设置为True。具体意义请参考 :class:`~fastNLP.DataSet` 。
    """
    def __init__(self, name, content, is_target=None, is_input=None, padder=None, ignore_type=False):
 class SetInputOrTargetException(Exception):
    def __init__(self, msg, index=None, field_name=None):
        super().__init__(msg)
        self.msg = msg
        self.index = index  # 标示在哪个数据遭遇到问题了
        self.field_name = field_name # 标示当前field的名称
 class AppendToTargetOrInputException(Exception):
    def __init__(self, msg, index=None, field_name=None):
        super().__init__(msg)
        self.msg = msg
        self.index = index  # 标示在哪个数据遭遇到问题了
        self.field_name = field_name # 标示当前field的名称
 class FieldArray:
    def __init__(self, name, content, is_target=False, is_input=False, padder=None, ignore_type=False):
        if len(content)==0:
            raise RuntimeError("Empty fieldarray is not allowed.")
        _content = content
        try:
            _content = list(_content)
        except BaseException as e:
            print(f"Cannot convert content(of type:{type(content)}) into list.")
            raise e
        self.name = name
        if isinstance(content, list):
            # 如果DataSet使用dict初始化, content 可能是二维list/二维array/三维list
            # 如果DataSet使用list of Instance 初始化, content可能是 [list]/[array]/[2D list]
            for idx, item in enumerate(content):
                # 这是使用list of Instance 初始化时第一个样本：FieldArray(name, [field])
                # 将[np.array] 转化为 list of list
                # 也可以支持[array, array, array]的情况
                if isinstance(item, np.ndarray):
                    content[idx] = content[idx].tolist()
        elif isinstance(content, np.ndarray):
            content = content.tolist()  # convert np.ndarray into 2-D list
        else:
            raise TypeError("content in FieldArray can only be list or numpy.ndarray, got {}.".format(type(content)))
        if len(content) == 0:
            raise RuntimeError("Cannot initialize FieldArray with empty list.")
        self.content = content  # 1维 或 2维 或 3维 list, 形状可能不对齐
        self.content_dim = None  # 表示content是多少维的list
        self.content = _content
        self._ignore_type = ignore_type
        #  根据input的情况设置input，target等
        self._cell_ndim = None  # 多少维度
        self.dtype = None  # 最内层的element都是什么类型的
        self._is_input = False
        self._is_target = False
        if is_input:
            self.is_input = is_input
        if is_target:
            self.is_target = is_target
        if padder is None:
            padder = AutoPadder(pad_val=0)
        else:
            assert isinstance(padder, Padder), "padder must be of type Padder."
            assert isinstance(padder, Padder), "padder must be of type fastNLP.Padder."
            padder = deepcopy(padder)
        self.set_padder(padder)
        self.ignore_type = ignore_type
        self.BASIC_TYPES = (int, float, str)  # content中可接受的Python基本类型，这里没有np.array
        self.pytype = None
        self.dtype = None
        self._is_input = None
        self._is_target = None
        if is_input is not None or is_target is not None:
            self.is_input = is_input
            self.is_target = is_target
    def _set_dtype(self):
        if self.ignore_type is False:
            self.pytype = self._type_detection(self.content)
            self.dtype = self._map_to_np_type(self.pytype)
    @property
    def ignore_type(self):
        return self._ignore_type
    @ignore_type.setter
    def ignore_type(self, value):
        if value:
            self._cell_ndim = None
            self.dtype = None
    @property
    def is_input(self):
        return self._is_input
    @is_input.setter
    def is_input(self, value):
        """
            当 field_array.is_input = True / False 时被调用
        """
        if value is True:
            self._set_dtype()
        # 如果(value为True)且(_is_input和_is_target都是False)且(ignore_type为False)
        if value is True and \
                self._is_target is False and \
                self._ignore_type is False:
            self._check_dtype_and_ndim()
        if value is False and self._is_target is False:
            self.dtype = None
            self._cell_ndim = None
        self._is_input = value
    @property
    def is_target(self):
        return self._is_target
    @is_target.setter
    def is_target(self, value):
        """
        当 field_array.is_target = True / False 时被调用
        """
        if value is True:
            self._set_dtype()
        if value is True and \
                self._is_input is False and \
                self._ignore_type is False:
            self._check_dtype_and_ndim()
        if value is False and self._is_input is False:
            self.dtype = None
            self._cell_ndim = None
        self._is_target = value
    def _type_detection(self, content):
        """
        当该field被设置为is_input或者is_target时被调用
    def _check_dtype_and_ndim(self):
        """
        if len(content) == 0:
            raise RuntimeError("Empty list in Field {}.".format(self.name))
        type_set = set([type(item) for item in content])
        if list in type_set:
            if len(type_set) > 1:
                # list 跟 非list 混在一起
                raise RuntimeError("Mixed data types in Field {}: {}".format(self.name, list(type_set)))
            # >1维list
            inner_type_set = set()
            for l in content:
                [inner_type_set.add(type(obj)) for obj in l]
            if list not in inner_type_set:
                # 二维list
                self.content_dim = 2
                return self._basic_type_detection(inner_type_set)
            else:
                if len(inner_type_set) == 1:
                    # >2维list
                    inner_inner_type_set = set()
                    for _2d_list in content:
                        for _1d_list in _2d_list:
                            [inner_inner_type_set.add(type(obj)) for obj in _1d_list]
                    if list in inner_inner_type_set:
                        raise RuntimeError("FieldArray cannot handle 4-D or more-D list.")
                    # 3维list
                    self.content_dim = 3
                    return self._basic_type_detection(inner_inner_type_set)
                else:
                    # list 跟 非list 混在一起
                    raise RuntimeError("Mixed data types in Field {}: {}".format(self.name, list(inner_type_set)))
        else:
            # 一维list
            for content_type in type_set:
                if content_type not in self.BASIC_TYPES:
                    raise RuntimeError("Unexpected data type in Field '{}'. Expect one of {}. Got {}.".format(
                        self.name, self.BASIC_TYPES, content_type))
            self.content_dim = 1
            return self._basic_type_detection(type_set)
    def _basic_type_detection(self, type_set):
        检查当前content所有的element是否是同一个类型，且是否每个元素具有相同的维度。通过的话，设置_cell_ndim与_ele_type属性；没有
            通过将直接报错.
        :return:
        """
        :param type_set: a set of Python types
        :return: one of self.BASIC_TYPES
        cell_0 = self.content[0]
        index = 0
        try:
            type_0, dim_0 = _get_ele_type_and_dim(cell_0)
            for cell in self.content[1:]:
                index += 1
                type_i, dim_i = _get_ele_type_and_dim(cell)
                if type_i!=type_0:
                    raise SetInputOrTargetException("Type:{} in index {} is different from the first element with type:{}."
                                                    ".".format(type_i, index, type_0))
                if dim_0!=dim_i:
                    raise SetInputOrTargetException("Dimension:{} in index {} is different from the first element with "
                                                    "dimension:{}.".format(dim_i, index, dim_0))
            self._cell_ndim = dim_0
            self.dtype = type_0
        except SetInputOrTargetException as e:
            e.index = index
            raise e
    def append(self, val:Any):
        """
        :param val: 把该val append到fieldarray。
        :return:
        """
        if len(type_set) == 1:
            return type_set.pop()
        elif len(type_set) == 2:
            # 有多个basic type; 可能需要up-cast
            if float in type_set and int in type_set:
                # up-cast int to float
                return float
            else:
                # str 跟 int 或者 float 混在一起
                raise RuntimeError("Mixed data types in Field {}: {}".format(self.name, list(type_set)))
        if (self._is_target or self._is_input) and self._ignore_type is False:
            type_, dim_ = _get_ele_type_and_dim(val)
            if self.dtype!=type_:
                raise AppendToTargetOrInputException(f"Value(type:{type_}) are of different types with "
                                                     f"previous values(type:{self.dtype}).")
            if self._cell_ndim!=dim_:
                raise AppendToTargetOrInputException(f"Value(dim:{dim_}) are of different dimensions with "
                                                     f"previous values(dim:{self._cell_ndim}).")
            self.content.append(val)
        else:
            # str, int, float混在一起
            raise RuntimeError("Mixed data types in Field {}: {}".format(self.name, list(type_set)))
    def _1d_list_check(self, val):
        """如果不是1D list就报错
        """
        type_set = set((type(obj) for obj in val))
        if any(obj not in self.BASIC_TYPES for obj in type_set):
            raise ValueError("Mixed data types in Field {}: {}".format(self.name, list(type_set)))
        self._basic_type_detection(type_set)
        # otherwise: _basic_type_detection will raise error
        return True
    def _2d_list_check(self, val):
        """如果不是2D list 就报错
        """
        type_set = set(type(obj) for obj in val)
        if list(type_set) != [list]:
            raise ValueError("Mixed data types in Field {}: {}".format(self.name, type_set))
        inner_type_set = set()
        for l in val:
            for obj in l:
                inner_type_set.add(type(obj))
        self._basic_type_detection(inner_type_set)
        return True
    @staticmethod
    def _map_to_np_type(basic_type):
        type_mapping = {int: np.int64, float: np.float64, str: np.str, np.ndarray: np.ndarray}
        return type_mapping[basic_type]
    def __repr__(self):
        return "FieldArray {}: {}".format(self.name, self.content.__repr__())
    def append(self, val):
        """将val append到这个field的尾部。如果这个field已经被设置为input或者target，则在append之前会检查该类型是否与已有
        的内容是匹配的。
        :param Any val: 需要append的值。
        """
        if self.ignore_type is False:
            if isinstance(val, list):
                pass
            elif isinstance(val, tuple):  # 确保最外层是list
                val = list(val)
            elif isinstance(val, np.ndarray):
                val = val.tolist()
            elif any((isinstance(val, t) for t in self.BASIC_TYPES)):
                pass
            else:
                raise RuntimeError(
                    "Unexpected data type {}. Should be list, np.array, or {}".format(type(val), self.BASIC_TYPES))
            if self.is_input is True or self.is_target is True:
                if type(val) == list:
                    if len(val) == 0:
                        raise ValueError("Cannot append an empty list.")
                    if self.content_dim == 2 and self._1d_list_check(val):
                        # 1维list检查
                        pass
                    elif self.content_dim == 3 and self._2d_list_check(val):
                        # 2维list检查
                        pass
                    else:
                        raise RuntimeError(
                            "Dimension not matched: expect dim={}, got {}.".format(self.content_dim - 1, val))
                elif type(val) in self.BASIC_TYPES and self.content_dim == 1:
                    # scalar检查
                    if type(val) == float and self.pytype == int:
                        self.pytype = float
                        self.dtype = self._map_to_np_type(self.pytype)
                else:
                    raise RuntimeError(
                        "Unexpected data type {}. Should be list, np.array, or {}".format(type(val), self.BASIC_TYPES))
        self.content.append(val)
            self.content.append(val)
    def __getitem__(self, indices):
        return self.get(indices, pad=False)
    def __setitem__(self, idx, val):
        assert isinstance(idx, int)
        if (self._is_target or self._is_input) and self.ignore_type is False:  # 需要检测类型
            type_, dim_ = _get_ele_type_and_dim(val)
            if self.dtype!=type_:
                raise RuntimeError(f"Value(type:{type_}) are of different types with "
                                                     f"other values(type:{self.dtype}).")
            if self._cell_ndim!=dim_:
                raise RuntimeError(f"Value(dim:{dim_}) are of different dimensions with "
                                                     f"previous values(dim:{self._cell_ndim}).")
        self.content[idx] = val
    def get(self, indices, pad=True):
        """
        根据给定的indices返回内容
@@ -257,14 +168,14 @@ class FieldArray(object):
        if isinstance(indices, int):
            return self.content[indices]
        if self.is_input is False and self.is_target is False:
            raise RuntimeError("Please specify either is_input or is_target is True for {}".format(self.name))
            raise RuntimeError("Please specify either is_input or is_target to True for {}".format(self.name))
        contents = [self.content[i] for i in indices]
        if self.padder is None or pad is False:
            return np.array(contents)
        else:
            return self.padder(contents, field_name=self.name, field_ele_dtype=self.dtype)
            return self.padder(contents, field_name=self.name, field_ele_dtype=self.dtype, dim=self._cell_ndim)
    def set_padder(self, padder):
        """
        设置padder，在这个field进行pad的时候用这个padder进行pad，如果为None则不进行pad。
@@ -276,7 +187,7 @@ class FieldArray(object):
            self.padder = deepcopy(padder)
        else:
            self.padder = None
    def set_pad_val(self, pad_val):
        """
        修改padder的pad_val.
@@ -286,7 +197,7 @@ class FieldArray(object):
        if self.padder is not None:
            self.padder.set_pad_val(pad_val)
        return self
    def __len__(self):
        """
        Returns the size of FieldArray.
@@ -294,7 +205,7 @@ class FieldArray(object):
        :return int length:
        """
        return len(self.content)
    def to(self, other):
        """
        将other的属性复制给本FieldArray(other必须为FieldArray类型).
@@ -303,22 +214,63 @@ class FieldArray(object):
        :param  other: :class:`~fastNLP.FieldArray` 从哪个field拷贝属性
        :return: :class:`~fastNLP.FieldArray`
        """
        assert isinstance(other, FieldArray), "Only support FieldArray type, not {}.".format(type(other))
        assert isinstance(other, FieldArray), "Only supports fastNLP.FieldArray type, not {}.".format(type(other))
        self.ignore_type = other.ignore_type
        self.is_input = other.is_input
        self.is_target = other.is_target
        self.padder = other.padder
        self.ignore_type = other.ignore_type
        return self
 def _is_iterable(content):
 def _get_ele_type_and_dim(cell:Any, dim=0):
    """
    识别cell的类别与dimension的数量
    numpy scalar type:https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
    :param cell:
    :param dim:
    :return:
    """
    if isinstance(cell, (str, Number, np.bool_)):
        return type(cell), dim
    elif isinstance(cell, list):
        dim += 1
        res = [_get_ele_type_and_dim(cell_i, dim) for cell_i in cell]
        types = set([i for i,j in res])
        dims = set([j for i,j in res])
        if len(types)>1:
            raise SetInputOrTargetException("Mixed types detected: {}.".format(list(types)))
        if len(dims)>1:
            raise SetInputOrTargetException("Mixed dimension detected: {}.".format(list(dims)))
        return types.pop(), dims.pop()
    elif isinstance(cell, torch.Tensor):
        return cell.dtype, cell.dim() + dim  # 如果是torch.mean的结果是0
    elif isinstance(cell, np.ndarray):
        if cell.dtype != np.dtype('O'):  # 如果不是object的话说明是well-formatted的了
            return cell.dtype.type, cell.ndim + dim
        # 否则需要继续往下iterate
        dim += 1
        res = [_get_ele_type_and_dim(cell_i, dim) for cell_i in cell]
        types = set([i for i,j in res])
        dims = set([j for i,j in res])
        if len(types)>1:
            raise SetInputOrTargetException("Mixed types detected: {}.".format(list(types)))
        if len(dims)>1:
            raise SetInputOrTargetException("Mixed dimension detected: {}.".format(list(dims)))
        return types.pop(), dims.pop()
    else: # 包含tuple, set, dict以及其它的类型
        raise SetInputOrTargetException(f"Cannot process type:{type(cell)}.")
 def _is_iterable(value):
    # 检查是否是iterable的, duck typing
    try:
        _ = (e for e in content)
    except TypeError:
        iter(value)
        return True
    except BaseException as e:
        return False
    return True
 class Padder:
@@ -327,32 +279,35 @@ class Padder:
    所有padder都需要继承这个类，并覆盖__call__方法。
    用于对batch进行padding操作。传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前deepcopy一份。
    .. py:function:: __call__(self, contents, field_name, field_ele_dtype):
        传入的是List内容。假设有以下的DataSet。
        :param list(Any) contents: 传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前
            deepcopy一份。
        :param str, field_name: field的名称。
        :param np.int64,np.float64,np.str,None, field_ele_dtype: 该field的内层元素的类型。如果该field的ignore_type为True，该这个值为None。
        :return: np.array([padded_element])
    """
    def __init__(self, pad_val=0, **kwargs):
        self.pad_val = pad_val
    def set_pad_val(self, pad_val):
        self.pad_val = pad_val
    def __call__(self, contents, field_name, field_ele_dtype):
    @abstractmethod
    def __call__(self, contents, field_name, field_ele_dtype, dim:int):
        """
        传入的是List内容。假设有以下的DataSet。
        :param list(Any) contents: 传入的element是inplace的，即直接修改element可能导致数据变化，建议inplace修改之前
            deepcopy一份。
        :param str, field_name: field的名称。
        :param np.int64,np.float64,np.str,None, field_ele_dtype: 该field的内层元素的类型。如果该field的ignore_type为True，该这个值为None。
        :param np.int64,np.float64,np.str,None, field_ele_dtype: 该field的内层元素的类型。如果该field的ignore_type为True，
            该这个值为None。
        :param dim: 这个field的维度。当ignore_type为True时，该值为None
        :return: np.array([padded_element])
        Example::
@@ -394,50 +349,87 @@ class AutoPadder(Padder):
    根据contents的数据自动判定是否需要做padding。
    1 如果元素类型(元素类型是指field中最里层元素的数据类型, 可以通过FieldArray.dtype查看，比如['This', 'is', ...]的元素类
    型为np.str, [[1,2], ...]的元素类型为np.int64)的数据不为(np.int64, np.float64)则不会进行pad
    型为str, [[1,2], ...]的元素类型为int)的数据不为数值类型则不会进行pad
    2 如果元素类型为数值类型,比如np.int64, np.float64, int, float, torch.int64等
    2 如果元素类型为(np.int64, np.float64),
        2.1 如果该field的内容为数值类型(包括int, float等)，比如为seq_len, 则不进行padding
        2.1 如果该field的内容为(np.int64, np.float64)，比如为seq_len, 则不进行padding
        2.2 如果该field的内容等价于一维list, 那么会将Batch中的List pad为一样长。
        2.2 如果该field的内容为List, 那么会将Batch中的List pad为一样长。若该List下还有里层的List需要padding，请使用其它padder。
        即如果Instance中field形如[1, 2, 3, ...]，则可以pad；若为[[1,2], [3,4, ...]]则不能进行pad
        2.3 如果该field的内容等价于二维list，那么会按照英语character padding的方式进行padding。如果是character padding建议使用
            :class: fastNLP.EngChar2DPadder.
        2.4 如果该field的内容等价于三维list，则如果每个instance在每个维度上相等，会组成一个batch的tensor返回，这种情况应该是为图片
            的情况。
    3 其它情况不进行处理，返回一个np.array类型。
    """
    def __init__(self, pad_val=0):
        """
        :param pad_val: int, padding的位置使用该index
        """
        super().__init__(pad_val=pad_val)
    def _is_two_dimension(self, contents):
        """
        判断contents是不是只有两个维度。[[1,2], [3]]是两个维度. [[[1,2], [3, 4, 5]], [[4,5]]]有三个维度
        :param contents:
        :return:
        """
        value = contents[0]
        if isinstance(value, (np.ndarray, list)):
            value = value[0]
            if isinstance(value, (np.ndarray, list)):
                return False
            return True
        return False
    def __call__(self, contents, field_name, field_ele_dtype):
        if not _is_iterable(contents[0]):
            array = np.array([content for content in contents], dtype=field_ele_dtype)
        elif field_ele_dtype in (np.int64, np.float64) and self._is_two_dimension(contents):
            max_len = max([len(content) for content in contents])
            array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype)
            for i, content in enumerate(contents):
                array[i][:len(content)] = content
        elif field_ele_dtype is None:
            array = np.array(contents)  # 当ignore_type=True时，直接返回contents
        else:  # should only be str
            array = np.array([content for content in contents])
        return array
    def __call__(self, contents, field_name, field_ele_dtype, dim):
        if field_ele_dtype:
            if dim>3:
                return np.array(contents)
            if isinstance(field_ele_dtype, np.dtype) or field_ele_dtype in (float, int, bool, str):
                if isinstance(field_ele_dtype, np.number) or field_ele_dtype in (float, int, bool):
                    if dim==0:
                        array = np.array(contents, dtype=field_ele_dtype)
                    elif dim==1:
                        max_len = max(map(len, contents))
                        array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype)
                        for i, content_i in enumerate(contents):
                            array[i, :len(content_i)] = content_i
                    elif dim==2:
                        max_len = max(map(len, contents))
                        max_word_len = max([max([len(content_ii) for content_ii in content_i]) for
                                            content_i in contents])
                        array = np.full((len(contents), max_len, max_word_len), self.pad_val, dtype=field_ele_dtype)
                        for i, content_i in enumerate(contents):
                            for j, content_ii in enumerate(content_i):
                                array[i, j, :len(content_ii)] = content_ii
                    else:
                        shape = np.shape(contents)
                        if len(shape)==4: # 说明各dimension是相同的大小
                            array = np.array(contents, dtype=field_ele_dtype)
                        else:
                            raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                    return array
                return np.array(contents)
            elif str(field_ele_dtype).startswith('torch'):
                if dim==0:
                    tensor = torch.tensor(contents).to(field_ele_dtype)
                elif dim==1:
                    max_len = max(map(len, contents))
                    tensor = torch.full((len(contents), max_len), fill_value=self.pad_val, dtype=field_ele_dtype)
                    for i, content_i in enumerate(contents):
                        tensor[i, :len(content_i)] = torch.tensor(content_i)
                elif dim==2:
                    max_len = max(map(len, contents))
                    max_word_len = max([max([len(content_ii) for content_ii in content_i]) for
                                        content_i in contents])
                    tensor = torch.full((len(contents), max_len, max_word_len), fill_value=self.pad_val,
                                        dtype=field_ele_dtype)
                    for i, content_i in enumerate(contents):
                        for j, content_ii in enumerate(content_i):
                            tensor[i, j, :len(content_ii)] = torch.tensor(content_ii)
                else:
                    shapes = set([np.shape(content_i) for content_i in contents])
                    if len(shapes)>1:
                        raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                    shape = shapes.pop()
                    if len(shape)==3:
                        tensor = torch.full([len(contents)]+list(shape), fill_value=self.pad_val, dtype=field_ele_dtype)
                        for i, content_i in enumerate(contents):
                            tensor[i] = torch.tensor(content_i, dtype=field_ele_dtype)
                    else:
                        raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                return tensor
            else:
                return np.array(contents)  # 不进行任何操作
        else:
            return np.array(contents)
 class EngChar2DPadder(Padder):
@@ -463,7 +455,7 @@ class EngChar2DPadder(Padder):
        dataset.set_padder('chars', padder)  # chars这个field的设置为了EnChar2DPadder
    """
    def __init__(self, pad_val=0, pad_length=0):
        """
        :param pad_val: int, pad的位置使用该index
@@ -471,32 +463,10 @@ class EngChar2DPadder(Padder):
            都pad或截取到该长度.
        """
        super().__init__(pad_val=pad_val)
        self.pad_length = pad_length
    def _exactly_three_dims(self, contents, field_name):
        """
        检查传入的contents是否刚好是3维，如果不是3维就报错。理论上，第一个维度是batch，第二个维度是word，第三个维度是character
        :param contents:
        :param field_name: str
        :return:
        """
        if not isinstance(contents, list):
            raise TypeError("contents should be a list, not {}.".format(type(contents)))
        value = contents[0]
        try:
            value = value[0]
        except:
            raise ValueError("Field:{} only has one dimension.".format(field_name))
        try:
            value = value[0]
        except:
            raise ValueError("Field:{} only has two dimensions.".format(field_name))
        if _is_iterable(value):
            raise ValueError("Field:{} has more than 3 dimension.".format(field_name))
    def __call__(self, contents, field_name, field_ele_dtype):
    def __call__(self, contents, field_name, field_ele_dtype, dim):
        """
        期望输入类似于
        [
@@ -510,24 +480,24 @@ class EngChar2DPadder(Padder):
        :param field_ele_dtype
        :return:
        """
        if field_ele_dtype not in (np.int64, np.float64):
        if field_ele_dtype not in (np.int64, np.float64, int, float):
            raise TypeError('dtype of Field:{} should be np.int64 or np.float64 to do 2D padding, get {}.'.format(
                field_name, field_ele_dtype
            ))
        self._exactly_three_dims(contents, field_name)
        assert dim==2, f"Field:{field_name} has {dim}, EngChar2DPadder only supports input with 2 dimensions."
        if self.pad_length < 1:
            max_char_length = max(max([[len(char_lst) for char_lst in word_lst] for word_lst in contents]))
            max_char_length = max([max(len(char_lst) for char_lst in word_lst) for word_lst in contents])
        else:
            max_char_length = self.pad_length
        max_sent_length = max(len(word_lst) for word_lst in contents)
        batch_size = len(contents)
        dtype = type(contents[0][0][0])
        padded_array = np.full((batch_size, max_sent_length, max_char_length), fill_value=self.pad_val,
                               dtype=dtype)
        for b_idx, word_lst in enumerate(contents):
            for c_idx, char_lst in enumerate(word_lst):
                chars = char_lst[:max_char_length]
                padded_array[b_idx, c_idx, :len(chars)] = chars
        return padded_array
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -438,7 +438,7 @@ def _bio_tag_to_spans(tags, ignore_labels=None):
 class SpanFPreRecMetric(MetricBase):
    """
    r"""
    别名：:class:`fastNLP.SpanFPreRecMetric` :class:`fastNLP.core.metrics.SpanFPreRecMetric`
    在序列标注问题中，以span的方式计算F, pre, rec.
@@ -476,8 +476,8 @@ class SpanFPreRecMetric(MetricBase):
        label的f1, pre, rec
    :param str f_type: 'micro'或'macro'. 'micro':通过先计算总体的TP，FN和FP的数量，再计算f, precision, recall; 'macro':
        分布计算每个类别的f, precision, recall，然后做平均（各类别f的权重相同）
    :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
        则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    :param float beta: f_beta分数， :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` .
        常用为beta=0.5, 1, 2. 若为0.5则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    """
    def __init__(self, tag_vocab, pred=None, target=None, seq_len=None, encoding_type='bio', ignore_labels=None,
@@ -699,17 +699,17 @@ def _pred_topk(y_prob, k=1):
 class SQuADMetric(MetricBase):
    """
    r"""
    别名：:class:`fastNLP.SQuADMetric` :class:`fastNLP.core.metrics.SQuADMetric`
    SQuAD数据集metric
    :param pred1: 参数映射表中`pred1`的映射关系，None表示映射关系为`pred1`->`pred1`
    :param pred2: 参数映射表中`pred2`的映射关系，None表示映射关系为`pred2`->`pred2`
    :param target1: 参数映射表中`target1`的映射关系，None表示映射关系为`target1`->`target1`
    :param target2: 参数映射表中`target2`的映射关系，None表示映射关系为`target2`->`target2`
    :param float beta: f_beta分数，f_beta = (1 + beta^2)*(pre*rec)/(beta^2*pre + rec). 常用为beta=0.5, 1, 2. 若为0.5
        则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    :param pred1: 参数映射表中 `pred1` 的映射关系，None表示映射关系为 `pred1` -> `pred1`
    :param pred2: 参数映射表中 `pred2` 的映射关系，None表示映射关系为 `pred2` -> `pred2`
    :param target1: 参数映射表中 `target1` 的映射关系，None表示映射关系为 `target1` -> `target1`
    :param target2: 参数映射表中 `target2` 的映射关系，None表示映射关系为 `target2` -> `target2`
    :param float beta: f_beta分数， :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` .
        常用为beta=0.5, 1, 2. 若为0.5则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
    :param bool right_open: right_open为true表示start跟end指针指向一个左闭右开区间，为false表示指向一个左闭右闭区间。
    :param bool print_predict_stat: True则输出预测答案是否为空与正确答案是否为空的统计信息, False则不输出
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -494,12 +494,15 @@ class Trainer(object):
        self.callback_manager = CallbackManager(env={"trainer": self},
                                                callbacks=callbacks)
    def train(self, load_best_model=True):
    def train(self, load_best_model=True, on_exception='auto'):
        """
        使用该函数使Trainer开始训练。
        :param bool load_best_model: 该参数只有在初始化提供了dev_data的情况下有效，
                如果True, trainer将在返回之前重新加载dev表现最好的模型参数。
        :param bool load_best_model: 该参数只有在初始化提供了dev_data的情况下有效，如果True, trainer将在返回之前重新加载dev表现
                最好的模型参数。
        :param str on_exception: 在训练过程遭遇exception，并被 :py:class:Callback 的on_exception()处理后，是否继续抛出异常。
                支持'ignore','raise', 'auto': 'ignore'将捕获异常，写在Trainer.train()后面的代码将继续运行; 'raise'将异常抛出;
                'auto'将ignore以下两种Exception: CallbackException与KeyboardInterrupt, raise其它exception.
        :return dict: 返回一个字典类型的数据,
                内含以下内容::
@@ -528,10 +531,16 @@ class Trainer(object):
                self.callback_manager.on_train_begin()
                self._train()
                self.callback_manager.on_train_end()
            except (CallbackException, KeyboardInterrupt) as e:
            except BaseException as e:
                self.callback_manager.on_exception(e)
                if on_exception == 'auto':
                    if not isinstance(e, (CallbackException, KeyboardInterrupt)):
                        raise e
                elif on_exception == 'raise':
                    raise e
            if self.dev_data is not None and hasattr(self, 'best_dev_perf'):
            if self.dev_data is not None and self.best_dev_perf is not None:
                print(
                    "\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) +
                    self.tester._format_eval_results(self.best_dev_perf), )
--- a/fastNLP/core/utils.py
+++ b/fastNLP/core/utils.py
@@ -3,7 +3,8 @@ utils模块实现了 fastNLP 内部和外部所需的很多工具。其中用户
 """
 __all__ = [
    "cache_results",
    "seq_len_to_mask"
    "seq_len_to_mask",
    "Option",
 ]
 import _pickle
@@ -21,6 +22,32 @@ _CheckRes = namedtuple('_CheckRes', ['missing', 'unused', 'duplicated', 'require
                                     'varargs'])
 class Option(dict):
    """a dict can treat keys as attributes"""
    def __getattr__(self, item):
        try:
            return self.__getitem__(item)
        except KeyError:
            raise AttributeError(item)
    def __setattr__(self, key, value):
        if key.startswith('__') and key.endswith('__'):
            raise AttributeError(key)
        self.__setitem__(key, value)
    def __delattr__(self, item):
        try:
            self.pop(item)
        except KeyError:
            raise AttributeError(item)
    def __getstate__(self):
        return self
    def __setstate__(self, state):
        self.update(state)
 def _prepare_cache_filepath(filepath):
    """
    检查filepath是否可以作为合理的cache文件. 如果可以的话，会自动创造路径
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -1,11 +1,26 @@
 __all__ = [
    "Vocabulary"
    "Vocabulary",
    "VocabularyOption",
 ]
 from functools import wraps
 from collections import Counter
 from .dataset import DataSet
 from .utils import Option
 class VocabularyOption(Option):
    def __init__(self,
                 max_size=None,
                 min_freq=None,
                 padding='<pad>',
                 unknown='<unk>'):
        super().__init__(
            max_size=max_size,
            min_freq=min_freq,
            padding=padding,
            unknown=unknown
        )
 def _check_build_vocab(func):
--- a/fastNLP/io/base_loader.py
+++ b/fastNLP/io/base_loader.py
@@ -1,10 +1,14 @@
 __all__ = [
    "BaseLoader"
    "BaseLoader",
    'DataInfo',
    'DataSetLoader',
 ]
 import _pickle as pickle
 import os
 from typing import Union, Dict
 import os
 from ..core.dataset import DataSet
 class BaseLoader(object):
    """
@@ -51,24 +55,161 @@ class BaseLoader(object):
            return obj
 class DataLoaderRegister:
    _readers = {}
    @classmethod
    def set_reader(cls, reader_cls, read_fn_name):
        # def wrapper(reader_cls):
        if read_fn_name in cls._readers:
            raise KeyError(
                'duplicate reader: {} and {} for read_func: {}'.format(cls._readers[read_fn_name], reader_cls,
                                                                       read_fn_name))
        if hasattr(reader_cls, 'load'):
            cls._readers[read_fn_name] = reader_cls().load
        return reader_cls
    @classmethod
    def get_reader(cls, read_fn_name):
        if read_fn_name in cls._readers:
            return cls._readers[read_fn_name]
        raise AttributeError('no read function: {}'.format(read_fn_name))
    # TODO 这个类使用在何处？
 def _download_from_url(url, path):
    try:
        from tqdm.auto import tqdm
    except:
        from ..core.utils import _pseudo_tqdm as tqdm
    import requests
    """Download file"""
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, stream=True)
    chunk_size = 16 * 1024
    total_size = int(r.headers.get('Content-length', 0))
    with open(path, "wb") as file, \
            tqdm(total=total_size, unit='B', unit_scale=1, desc=path.split('/')[-1]) as t:
        for chunk in r.iter_content(chunk_size):
            if chunk:
                file.write(chunk)
                t.update(len(chunk))
 def _uncompress(src, dst):
    import zipfile
    import gzip
    import tarfile
    import os
    def unzip(src, dst):
        with zipfile.ZipFile(src, 'r') as f:
            f.extractall(dst)
    def ungz(src, dst):
        with gzip.open(src, 'rb') as f, open(dst, 'wb') as uf:
            length = 16 * 1024  # 16KB
            buf = f.read(length)
            while buf:
                uf.write(buf)
                buf = f.read(length)
    def untar(src, dst):
        with tarfile.open(src, 'r:gz') as f:
            f.extractall(dst)
    fn, ext = os.path.splitext(src)
    _, ext_2 = os.path.splitext(fn)
    if ext == '.zip':
        unzip(src, dst)
    elif ext == '.gz' and ext_2 != '.tar':
        ungz(src, dst)
    elif (ext == '.gz' and ext_2 == '.tar') or ext_2 == '.tgz':
        untar(src, dst)
    else:
        raise ValueError('unsupported file {}'.format(src))
 class DataInfo:
    """
    经过处理的数据信息，包括一系列数据集（比如：分开的训练集、验证集和测试集）及它们所用的词表和词嵌入。
    :param vocabs: 从名称(字符串)到 :class:`~fastNLP.Vocabulary` 类型的dict
    :param embeddings: 从名称(字符串)到一系列 embedding 的dict，参考 :class:`~fastNLP.io.EmbedLoader`
    :param datasets: 从名称(字符串)到 :class:`~fastNLP.DataSet` 类型的dict
    """
    def __init__(self, vocabs: dict = None, embeddings: dict = None, datasets: dict = None):
        self.vocabs = vocabs or {}
        self.embeddings = embeddings or {}
        self.datasets = datasets or {}
 class DataSetLoader:
    """
    别名：:class:`fastNLP.io.DataSetLoader` :class:`fastNLP.io.dataset_loader.DataSetLoader`
    定义了各种 DataSetLoader 所需的API 接口，开发者应该继承它实现各种的 DataSetLoader。
    开发者至少应该编写如下内容:
    - _load 函数：从一个数据文件中读取数据到一个 :class:`~fastNLP.DataSet`
    - load 函数（可以使用基类的方法）：从一个或多个数据文件中读取数据到一个或多个 :class:`~fastNLP.DataSet`
    - process 函数：一个或多个从数据文件中读取数据，并处理成可以训练的一个或多个 :class:`~fastNLP.DataSet`
    **process 函数中可以 调用load 函数或 _load 函数**
    """
    URL = ''
    DATA_DIR = ''
    ROOT_DIR = '.fastnlp/datasets/'
    UNCOMPRESS = True
    def _download(self, url: str, pdir: str, uncompress=True) -> str:
        """
        从 ``url`` 下载数据到 ``path``， 如果 ``uncompress`` 为 ``True`` ，自动解压。
        :param url: 下载的网站
        :param pdir: 下载到的目录
        :param uncompress:  是否自动解压缩
        :return: 数据的存放路径
        """
        fn = os.path.basename(url)
        path = os.path.join(pdir, fn)
        """check data exists"""
        if not os.path.exists(path):
            os.makedirs(pdir, exist_ok=True)
            _download_from_url(url, path)
        if uncompress:
            dst = os.path.join(pdir, 'data')
            if not os.path.exists(dst):
                _uncompress(path, dst)
            return dst
        return path
    def download(self):
        return self._download(
            self.URL,
            os.path.join(self.ROOT_DIR, self.DATA_DIR),
            uncompress=self.UNCOMPRESS)
    def load(self, paths: Union[str, Dict[str, str]]) -> Union[DataSet, Dict[str, DataSet]]:
        """
        从指定一个或多个路径中的文件中读取数据，返回一个或多个数据集 :class:`~fastNLP.DataSet` 。
        如果处理多个路径，传入的 dict 中的 key 与返回的 dict 中的 key 保存一致。
        :param Union[str, Dict[str, str]] paths: 文件路径
        :return: :class:`~fastNLP.DataSet` 类的对象或存储多个 :class:`~fastNLP.DataSet` 的字典
        """
        if isinstance(paths, str):
            return self._load(paths)
        return {name: self._load(path) for name, path in paths.items()}
    def _load(self, path: str) -> DataSet:
        """从指定路径的文件中读取数据,返回 :class:`~fastNLP.DataSet` 类型的对象
        :param str path: 文件路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        raise NotImplementedError
    def process(self, paths: Union[str, Dict[str, str]], **options) -> DataInfo:
        """
        对于特定的任务和数据集，读取并处理数据，返回处理DataInfo类对象或字典。
        从指定一个或多个路径中的文件中读取数据，DataInfo对象中可以包含一个或多个数据集 。
        如果处理多个路径，传入的 dict 的 key 与返回DataInfo中的 dict 中的 key 保存一致。
        返回的 :class:`DataInfo` 对象有如下属性：
        - vocabs: 由从数据集中获取的词表组成的字典，每个词表
        - embeddings: (可选) 数据集对应的词嵌入
        - datasets: 一个dict，包含一系列 :class:`~fastNLP.DataSet` 类型的对象。其中 field 的命名参考 :mod:`~fastNLP.core.const`
        :param paths: 原始数据读取的路径
        :param options: 根据不同的任务和数据集，设计自己的参数
        :return: 返回一个 DataInfo
        """
        raise NotImplementedError
--- a/fastNLP/io/data_loader/sst.py
+++ b/fastNLP/io/data_loader/sst.py
@@ -0,0 +1,95 @@
 from typing import Iterable
 from nltk import Tree
 from ..base_loader import DataInfo, DataSetLoader
 from ...core.vocabulary import VocabularyOption, Vocabulary
 from ...core.dataset import DataSet
 from ...core.instance import Instance
 from ..embed_loader import EmbeddingOption, EmbedLoader
 class SSTLoader(DataSetLoader):
    URL = 'https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip'
    DATA_DIR = 'sst/'
    """
    别名：:class:`fastNLP.io.SSTLoader` :class:`fastNLP.io.dataset_loader.SSTLoader`
    读取SST数据集, DataSet包含fields::
        words: list(str) 需要分类的文本
        target: str 文本的标签
    数据来源: https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip
    :param subtree: 是否将数据展开为子树，扩充数据量. Default: ``False``
    :param fine_grained: 是否使用SST-5标准，若 ``False`` , 使用SST-2。Default: ``False``
    """
    def __init__(self, subtree=False, fine_grained=False):
        self.subtree = subtree
        tag_v = {'0': 'very negative', '1': 'negative', '2': 'neutral',
                 '3': 'positive', '4': 'very positive'}
        if not fine_grained:
            tag_v['0'] = tag_v['1']
            tag_v['4'] = tag_v['3']
        self.tag_v = tag_v
    def _load(self, path):
        """
        :param str path: 存储数据的路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        datalist = []
        with open(path, 'r', encoding='utf-8') as f:
            datas = []
            for l in f:
                datas.extend([(s, self.tag_v[t])
                              for s, t in self._get_one(l, self.subtree)])
        ds = DataSet()
        for words, tag in datas:
            ds.append(Instance(words=words, target=tag))
        return ds
    @staticmethod
    def _get_one(data, subtree):
        tree = Tree.fromstring(data)
        if subtree:
            return [(t.leaves(), t.label()) for t in tree.subtrees()]
        return [(tree.leaves(), tree.label())]
    def process(self,
                paths,
                train_ds: Iterable[str] = None,
                src_vocab_op: VocabularyOption = None,
                tgt_vocab_op: VocabularyOption = None,
                src_embed_op: EmbeddingOption = None):
        input_name, target_name = 'words', 'target'
        src_vocab = Vocabulary() if src_vocab_op is None else Vocabulary(**src_vocab_op)
        tgt_vocab = Vocabulary(unknown=None, padding=None) \
            if tgt_vocab_op is None else Vocabulary(**tgt_vocab_op)
        info = DataInfo(datasets=self.load(paths))
        _train_ds = [info.datasets[name]
                     for name in train_ds] if train_ds else info.datasets.values()
        src_vocab.from_dataset(*_train_ds, field_name=input_name)
        tgt_vocab.from_dataset(*_train_ds, field_name=target_name)
        src_vocab.index_dataset(
            *info.datasets.values(),
            field_name=input_name, new_field_name=input_name)
        tgt_vocab.index_dataset(
            *info.datasets.values(),
            field_name=target_name, new_field_name=target_name)
        info.vocabs = {
            input_name: src_vocab,
            target_name: tgt_vocab
        }
        if src_embed_op is not None:
            src_embed_op.vocab = src_vocab
            init_emb = EmbedLoader.load_with_vocab(**src_embed_op)
            info.embeddings[input_name] = init_emb
        return info
--- a/fastNLP/io/dataset_loader.py
+++ b/fastNLP/io/dataset_loader.py
@@ -13,8 +13,6 @@ dataset_loader模块实现了许多 DataSetLoader, 用于读取不同格式的
 为 fastNLP 提供 DataSetLoader 的开发者请参考 :class:`~fastNLP.io.DataSetLoader` 的介绍。
 """
 __all__ = [
    'DataInfo',
    'DataSetLoader',
    'CSVLoader',
    'JsonLoader',
    'ConllLoader',
@@ -24,157 +22,13 @@ __all__ = [
    'Conll2003Loader',
 ]
 from nltk.tree import Tree
 from nltk import Tree
 from ..core.dataset import DataSet
 from ..core.instance import Instance
 from .file_reader import _read_csv, _read_json, _read_conll
 from typing import Union, Dict
 import os
 def _download_from_url(url, path):
    try:
        from tqdm.auto import tqdm
    except:
        from ..core.utils import _pseudo_tqdm as tqdm
    import requests
    """Download file"""
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, stream=True)
    chunk_size = 16 * 1024
    total_size = int(r.headers.get('Content-length', 0))
    with open(path, "wb") as file, \
            tqdm(total=total_size, unit='B', unit_scale=1, desc=path.split('/')[-1]) as t:
        for chunk in r.iter_content(chunk_size):
            if chunk:
                file.write(chunk)
                t.update(len(chunk))
    return
 def _uncompress(src, dst):
    import zipfile
    import gzip
    import tarfile
    import os
    def unzip(src, dst):
        with zipfile.ZipFile(src, 'r') as f:
            f.extractall(dst)
    def ungz(src, dst):
        with gzip.open(src, 'rb') as f, open(dst, 'wb') as uf:
            length = 16 * 1024  # 16KB
            buf = f.read(length)
            while buf:
                uf.write(buf)
                buf = f.read(length)
    def untar(src, dst):
        with tarfile.open(src, 'r:gz') as f:
            f.extractall(dst)
    fn, ext = os.path.splitext(src)
    _, ext_2 = os.path.splitext(fn)
    if ext == '.zip':
        unzip(src, dst)
    elif ext == '.gz' and ext_2 != '.tar':
        ungz(src, dst)
    elif (ext == '.gz' and ext_2 == '.tar') or ext_2 == '.tgz':
        untar(src, dst)
    else:
        raise ValueError('unsupported file {}'.format(src))
 class DataInfo:
    """
    经过处理的数据信息，包括一系列数据集（比如：分开的训练集、验证集和测试集）及它们所用的词表和词嵌入。
    :param vocabs: 从名称(字符串)到 :class:`~fastNLP.Vocabulary` 类型的dict
    :param embeddings: 从名称(字符串)到一系列 embedding 的dict，参考 :class:`~fastNLP.io.EmbedLoader`
    :param datasets: 从名称(字符串)到 :class:`~fastNLP.DataSet` 类型的dict
    """
    def __init__(self, vocabs: dict = None, embeddings: dict = None, datasets: dict = None):
        self.vocabs = vocabs or {}
        self.embeddings = embeddings or {}
        self.datasets = datasets or {}
 class DataSetLoader:
    """
    别名：:class:`fastNLP.io.DataSetLoader` :class:`fastNLP.io.dataset_loader.DataSetLoader`
    定义了各种 DataSetLoader (针对特定数据上的特定任务) 所需的API 接口，开发者应该继承它实现各种的 DataSetLoader。
    开发者至少应该编写如下内容:
    - _load 函数：从一个数据文件中读取数据到一个 :class:`~fastNLP.DataSet`
    - load 函数（可以使用基类的方法）：从一个或多个数据文件中读取数据到一个或多个 :class:`~fastNLP.DataSet`
    - process 函数：一个或多个从数据文件中读取数据，并处理成可以训练的一个或多个 :class:`~fastNLP.DataSet`
    **process 函数中可以 调用load 函数或 _load 函数**
    """
    def _download(self, url: str, path: str, uncompress=True) -> str:
        """
        从 ``url`` 下载数据到 ``path``， 如果 ``uncompress`` 为 ``True`` ，自动解压。
        :param url: 下载的网站
        :param path: 下载到的目录
        :param uncompress:  是否自动解压缩
        :return: 数据的存放路径
        """
        pdir = os.path.dirname(path)
        os.makedirs(pdir, exist_ok=True)
        _download_from_url(url, path)
        if uncompress:
            dst = os.path.join(pdir, 'data')
            _uncompress(path, dst)
            return dst
        return path
    def load(self, paths: Union[str, Dict[str, str]]) -> Union[DataSet, Dict[str, DataSet]]:
        """
        从指定一个或多个路径中的文件中读取数据，返回一个或多个数据集 :class:`~fastNLP.DataSet` 。
        如果处理多个路径，传入的 dict 中的 key 与返回的 dict 中的 key 保存一致。
        :param Union[str, Dict[str, str]] paths: 文件路径
        :return: :class:`~fastNLP.DataSet` 类的对象或存储多个 :class:`~fastNLP.DataSet` 的字典
        """
        if isinstance(paths, str):
            return self._load(paths)
        return {name: self._load(path) for name, path in paths.items()}
    def _load(self, path: str) -> DataSet:
        """从指定路径的文件中读取数据,返回 :class:`~fastNLP.DataSet` 类型的对象
        :param str path: 文件路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        raise NotImplementedError
    def process(self, paths: Union[str, Dict[str, str]], **options) -> DataInfo:
        """
        对于特定的任务和数据集，读取并处理数据，返回处理DataInfo类对象或字典。
        从指定一个或多个路径中的文件中读取数据，DataInfo对象中可以包含一个或多个数据集 。
        如果处理多个路径，传入的 dict 的 key 与返回DataInfo中的 dict 中的 key 保存一致。
        返回的 :class:`DataInfo` 对象有如下属性：
        - vocabs: 由从数据集中获取的词表组成的字典，每个词表
        - embeddings: (可选) 数据集对应的词嵌入
        - datasets: 一个dict，包含一系列 :class:`~fastNLP.DataSet` 类型的对象。其中 field 的命名参考 :mod:`~fastNLP.core.const`
        :param paths: 原始数据读取的路径
        :param options: 根据不同的任务和数据集，设计自己的参数
        :return: 返回一个 DataInfo
        """
        raise NotImplementedError
 from .base_loader import DataSetLoader
 from .data_loader.sst import SSTLoader
 from ..core.const import Const
 class PeopleDailyCorpusLoader(DataSetLoader):
@@ -183,12 +37,12 @@ class PeopleDailyCorpusLoader(DataSetLoader):
    读取人民日报数据集
    """
    def __init__(self, pos=True, ner=True):
        super(PeopleDailyCorpusLoader, self).__init__()
        self.pos = pos
        self.ner = ner
    def _load(self, data_path):
        with open(data_path, "r", encoding="utf-8") as f:
            sents = f.readlines()
@@ -233,7 +87,7 @@ class PeopleDailyCorpusLoader(DataSetLoader):
                example.append(sent_ner)
            examples.append(example)
        return self.convert(examples)
    def convert(self, data):
        """
@@ -284,7 +138,7 @@ class ConllLoader(DataSetLoader):
    :param indexes: 需要保留的数据列下标，从0开始。若为 ``None`` ，则所有列都保留。Default: ``None``
    :param dropna: 是否忽略非法数据，若 ``False`` ，遇到非法数据时抛出 ``ValueError`` 。Default: ``False``
    """
    def __init__(self, headers, indexes=None, dropna=False):
        super(ConllLoader, self).__init__()
        if not isinstance(headers, (list, tuple)):
@@ -298,7 +152,7 @@ class ConllLoader(DataSetLoader):
            if len(indexes) != len(headers):
                raise ValueError
            self.indexes = indexes
    def _load(self, path):
        ds = DataSet()
        for idx, data in _read_conll(path, indexes=self.indexes, dropna=self.dropna):
@@ -316,7 +170,7 @@ class Conll2003Loader(ConllLoader):
    关于数据集的更多信息,参考:
    https://sites.google.com/site/ermasoftware/getting-started/ne-tagging-conll2003-data
    """
    def __init__(self):
        headers = [
            'tokens', 'pos', 'chunks', 'ner',
@@ -354,56 +208,6 @@ def _cut_long_sentence(sent, max_sample_length=200):
    return cutted_sentence
 class SSTLoader(DataSetLoader):
    """
    别名：:class:`fastNLP.io.SSTLoader` :class:`fastNLP.io.dataset_loader.SSTLoader`
    读取SST数据集, DataSet包含fields::
        words: list(str) 需要分类的文本
        target: str 文本的标签
    数据来源: https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip
    :param subtree: 是否将数据展开为子树，扩充数据量. Default: ``False``
    :param fine_grained: 是否使用SST-5标准，若 ``False`` , 使用SST-2。Default: ``False``
    """
    def __init__(self, subtree=False, fine_grained=False):
        self.subtree = subtree
        tag_v = {'0': 'very negative', '1': 'negative', '2': 'neutral',
                 '3': 'positive', '4': 'very positive'}
        if not fine_grained:
            tag_v['0'] = tag_v['1']
            tag_v['4'] = tag_v['3']
        self.tag_v = tag_v
    def _load(self, path):
        """
        :param str path: 存储数据的路径
        :return: 一个 :class:`~fastNLP.DataSet` 类型的对象
        """
        datalist = []
        with open(path, 'r', encoding='utf-8') as f:
            datas = []
            for l in f:
                datas.extend([(s, self.tag_v[t])
                              for s, t in self._get_one(l, self.subtree)])
        ds = DataSet()
        for words, tag in datas:
            ds.append(Instance(words=words, target=tag))
        return ds
    @staticmethod
    def _get_one(data, subtree):
        tree = Tree.fromstring(data)
        if subtree:
            return [(t.leaves(), t.label()) for t in tree.subtrees()]
        return [(tree.leaves(), tree.label())]
 class JsonLoader(DataSetLoader):
    """
    别名：:class:`fastNLP.io.JsonLoader` :class:`fastNLP.io.dataset_loader.JsonLoader`
@@ -417,7 +221,7 @@ class JsonLoader(DataSetLoader):
    :param bool dropna: 是否忽略非法数据,若 ``True`` 则忽略,若 ``False`` ,在遇到非法数据时,抛出 ``ValueError`` .
        Default: ``False``
    """
    def __init__(self, fields=None, dropna=False):
        super(JsonLoader, self).__init__()
        self.dropna = dropna
@@ -428,7 +232,7 @@ class JsonLoader(DataSetLoader):
            for k, v in fields.items():
                self.fields[k] = k if v is None else v
            self.fields_list = list(self.fields.keys())
    def _load(self, path):
        ds = DataSet()
        for idx, d in _read_json(path, fields=self.fields_list, dropna=self.dropna):
@@ -452,27 +256,27 @@ class SNLILoader(JsonLoader):
    数据来源: https://nlp.stanford.edu/projects/snli/snli_1.0.zip
    """
    def __init__(self):
        fields = {
            'sentence1_parse': 'words1',
            'sentence2_parse': 'words2',
            'gold_label': 'target',
            'sentence1_parse': Const.INPUTS(0),
            'sentence2_parse': Const.INPUTS(1),
            'gold_label': Const.TARGET,
        }
        super(SNLILoader, self).__init__(fields=fields)
    def _load(self, path):
        ds = super(SNLILoader, self)._load(path)
        def parse_tree(x):
            t = Tree.fromstring(x)
            return t.leaves()
        ds.apply(lambda ins: parse_tree(
            ins['words1']), new_field_name='words1')
            ins[Const.INPUTS(0)]), new_field_name=Const.INPUTS(0))
        ds.apply(lambda ins: parse_tree(
            ins['words2']), new_field_name='words2')
        ds.drop(lambda x: x['target'] == '-')
            ins[Const.INPUTS(1)]), new_field_name=Const.INPUTS(1))
        ds.drop(lambda x: x[Const.TARGET] == '-')
        return ds
@@ -488,12 +292,12 @@ class CSVLoader(DataSetLoader):
    :param bool dropna: 是否忽略非法数据,若 ``True`` 则忽略,若 ``False`` ,在遇到非法数据时,抛出 ``ValueError`` .
        Default: ``False``
    """
    def __init__(self, headers=None, sep=",", dropna=False):
        self.headers = headers
        self.sep = sep
        self.dropna = dropna
    def _load(self, path):
        ds = DataSet()
        for idx, data in _read_csv(path, headers=self.headers,
@@ -508,7 +312,7 @@ def _add_seg_tag(data):
    :param data: list of ([word], [pos], [heads], [head_tags])
    :return: list of ([word], [pos])
    """
    _processed = []
    for word_list, pos_list, _, _ in data:
        new_sample = []
--- a/fastNLP/io/embed_loader.py
+++ b/fastNLP/io/embed_loader.py
@@ -1,5 +1,6 @@
 __all__ = [
    "EmbedLoader"
    "EmbedLoader",
    "EmbeddingOption",
 ]
 import os
@@ -9,8 +10,22 @@ import numpy as np
 from ..core.vocabulary import Vocabulary
 from .base_loader import BaseLoader
 from ..core.utils import Option
 class EmbeddingOption(Option):
    def __init__(self,
                 embed_filepath=None,
                 dtype=np.float32,
                 normalize=True,
                 error='ignore'):
        super().__init__(
            embed_filepath=embed_filepath,
            dtype=dtype,
            normalize=normalize,
            error=error
        )
 class EmbedLoader(BaseLoader):
    """
    别名：:class:`fastNLP.io.EmbedLoader` :class:`fastNLP.io.embed_loader.EmbedLoader`
@@ -92,9 +107,9 @@ class EmbedLoader(BaseLoader):
        :param bool normalize: 是否将每个vector归一化到norm为1
        :param str error: `ignore` , `strict` ; 如果 `ignore` ，错误将自动跳过; 如果 `strict` , 错误将抛出。这里主要可能出错的地
            方在于词表有空行或者词表出现了维度不一致。
        :return numpy.ndarray: shape为 [len(vocab), dimension], dimension由pretrain的embedding决定。
        :return numpy.ndarray: Vocabulary Embedding的shape是[词表大小+x, 词表维度], "词表大小+x"是由于最终的大小还取决与
        :return (numpy.ndarray, Vocabulary): Embedding的shape是[词表大小+x, 词表维度], "词表大小+x"是由于最终的大小还取决与
            是否使用padding, 以及unknown有没有在词表中找到对应的词。 Vocabulary中的词的顺序与Embedding的顺序是一一对应的。
        """
        vocab = Vocabulary(padding=padding, unknown=unknown)
        vec_dict = {}
--- a/fastNLP/models/bert.py
+++ b/fastNLP/models/bert.py
@@ -10,6 +10,35 @@ from ..core.const import Const
 from ..modules.encoder import BertModel
 class BertConfig:
    def __init__(
            self,
            vocab_size=30522,
            hidden_size=768,
            num_hidden_layers=12,
            num_attention_heads=12,
            intermediate_size=3072,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=2,
            initializer_range=0.02
    ):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.intermediate = intermediate_size
        self.hidden_act = hidden_act
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.type_vocab_size = type_vocab_size
        self.initializer_range = initializer_range
 class BertForSequenceClassification(BaseModel):
    """BERT model for classification.
    This module is composed of the BERT model with a linear layer on top of
@@ -44,14 +73,19 @@ class BertForSequenceClassification(BaseModel):
    config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
    num_labels = 2
    model = BertForSequenceClassification(config, num_labels)
    model = BertForSequenceClassification(num_labels, config)
    logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config, num_labels, bert_dir):
    def __init__(self, num_labels, config=None, bert_dir=None):
        super(BertForSequenceClassification, self).__init__()
        self.num_labels = num_labels
        self.bert = BertModel.from_pretrained(bert_dir)
        if bert_dir is not None:
            self.bert = BertModel.from_pretrained(bert_dir)
        else:
            if config is None:
                config = BertConfig()
            self.bert = BertModel(**config.__dict__)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_labels)
@@ -106,14 +140,19 @@ class BertForMultipleChoice(BaseModel):
    config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
    num_choices = 2
    model = BertForMultipleChoice(config, num_choices, bert_dir)
    model = BertForMultipleChoice(num_choices, config, bert_dir)
    logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config, num_choices, bert_dir):
    def __init__(self, num_choices, config=None, bert_dir=None):
        super(BertForMultipleChoice, self).__init__()
        self.num_choices = num_choices
        self.bert = BertModel.from_pretrained(bert_dir)
        if bert_dir is not None:
            self.bert = BertModel.from_pretrained(bert_dir)
        else:
            if config is None:
                config = BertConfig()
            self.bert = BertModel(**config.__dict__)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, 1)
@@ -174,14 +213,19 @@ class BertForTokenClassification(BaseModel):
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
    num_labels = 2
    bert_dir = 'your-bert-file-dir'
    model = BertForTokenClassification(config, num_labels, bert_dir)
    model = BertForTokenClassification(num_labels, config, bert_dir)
    logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config, num_labels, bert_dir):
    def __init__(self, num_labels, config=None, bert_dir=None):
        super(BertForTokenClassification, self).__init__()
        self.num_labels = num_labels
        self.bert = BertModel.from_pretrained(bert_dir)
        if bert_dir is not None:
            self.bert = BertModel.from_pretrained(bert_dir)
        else:
            if config is None:
                config = BertConfig()
            self.bert = BertModel(**config.__dict__)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_labels)
@@ -252,9 +296,14 @@ class BertForQuestionAnswering(BaseModel):
    start_logits, end_logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config, bert_dir):
    def __init__(self, config=None, bert_dir=None):
        super(BertForQuestionAnswering, self).__init__()
        self.bert = BertModel.from_pretrained(bert_dir)
        if bert_dir is not None:
            self.bert = BertModel.from_pretrained(bert_dir)
        else:
            if config is None:
                config = BertConfig()
            self.bert = BertModel(**config.__dict__)
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)
--- a/reproduction/README.md
+++ b/reproduction/README.md
@@ -2,43 +2,28 @@
 这里复现了在fastNLP中实现的模型，旨在达到与论文中相符的性能。
 复现的模型有:
 - Star-Transformer
 - [Star-Transformer](Star_transformer/)
 - ...
 # 任务复现
 ## Text Classification (文本分类)
 - still in progress
 ## Matching (自然语言推理/句子匹配)
 - still in progress
 ## Sequence Labeling (序列标注)
 - still in progress
 ## Coreference resolution (指代消解)
 - still in progress
 ## Summarization (摘要)
 - still in progress
 ## Star-Transformer
 [reference](https://arxiv.org/abs/1902.09113)
 ### Performance (still in progress)
 |任务| 数据集 | SOTA | 模型表现 |
 |------|------| ------| ------|
 |Pos Tagging|CTB 9.0|-|ACC 92.31|
 |Pos Tagging|CONLL 2012|-|ACC 96.51|
 |Named Entity Recognition|CONLL 2012|-|F1 85.66|
 |Text Classification|SST|-|49.18|
 |Natural Language Inference|SNLI|-|83.76|
 ### Usage
 ``` python
 # for sequence labeling(ner, pos tagging, etc)
 from fastNLP.models.star_transformer import STSeqLabel
 model = STSeqLabel(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 # for sequence classification
 from fastNLP.models.star_transformer import STSeqCls
 model = STSeqCls(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 # for natural language inference
 from fastNLP.models.star_transformer import STNLICls
 model = STNLICls(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 ```
 ## ...
--- a/reproduction/Star_transformer/README.md
+++ b/reproduction/Star_transformer/README.md
@@ -0,0 +1,34 @@
 # Star-Transformer
 paper: [Star-Transformer](https://arxiv.org/abs/1902.09113)
 ## Performance (still in progress)
 |任务| 数据集 | SOTA | 模型表现 |
 |------|------| ------| ------|
 |Pos Tagging|CTB 9.0|-|ACC 92.31|
 |Pos Tagging|CONLL 2012|-|ACC 96.51|
 |Named Entity Recognition|CONLL 2012|-|F1 85.66|
 |Text Classification|SST|-|49.18|
 |Natural Language Inference|SNLI|-|83.76|
 ## Usage
 ``` python
 # for sequence labeling(ner, pos tagging, etc)
 from fastNLP.models.star_transformer import STSeqLabel
 model = STSeqLabel(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 # for sequence classification
 from fastNLP.models.star_transformer import STSeqCls
 model = STSeqCls(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 # for natural language inference
 from fastNLP.models.star_transformer import STNLICls
 model = STNLICls(
    vocab_size=10000, num_cls=50,
    emb_dim=300)
 ```
--- a/reproduction/matching/data/SNLIDataLoader.py
+++ b/reproduction/matching/data/SNLIDataLoader.py
@@ -0,0 +1,6 @@
 from fastNLP.io.dataset_loader import SNLILoader
 # TODO: still in progress
--- a/reproduction/matching/model/bert.py
+++ b/reproduction/matching/model/bert.py
@@ -0,0 +1,41 @@
 import torch
 import torch.nn as nn
 from fastNLP.core.const import Const
 from fastNLP.models import BaseModel
 from fastNLP.modules.encoder.bert import BertModel
 class BertForNLI(BaseModel):
    # TODO: still in progress
    def __init__(self, class_num=3, bert_dir=None):
        super(BertForNLI, self).__init__()
        if bert_dir is not None:
            self.bert = BertModel.from_pretrained(bert_dir)
        else:
            self.bert = BertModel()
        hidden_size = self.bert.pooler.dense._parameters['bias'].size(-1)
        self.classifier = nn.Linear(hidden_size, class_num)
    def forward(self, words, seq_len1, seq_len2, target=None):
        """
        :param torch.Tensor words: [batch_size, seq_len] input_ids
        :param torch.Tensor seq_len1: [batch_size, seq_len] token_type_ids
        :param torch.Tensor seq_len2: [batch_size, seq_len] attention_mask
        :param torch.Tensor target: [batch]
        :return:
        """
        _, pooled_output = self.bert(words, seq_len1, seq_len2)
        logits = self.classifier(pooled_output)
        if target is not None:
            loss_func = torch.nn.CrossEntropyLoss()
            loss = loss_func(logits, target)
            return {Const.OUTPUT: logits, Const.LOSS: loss}
        return {Const.OUTPUT: logits}
    def predict(self, words, seq_len1, seq_len2, target=None):
        return self.forward(words, seq_len1, seq_len2)
--- a/reproduction/matching/snli.py
+++ b/reproduction/matching/snli.py
@@ -0,0 +1,97 @@
 import os
 import torch
 from fastNLP.core import Vocabulary, DataSet, Trainer, Tester, Const, Adam, AccuracyMetric
 from reproduction.matching.data.SNLIDataLoader import SNLILoader
 from legacy.component.bert_tokenizer import BertTokenizer
 from reproduction.matching.model.bert import BertForNLI
 def preprocess_data(data: DataSet, bert_dir):
    """
    preprocess data set to bert-need data set.
    :param data:
    :param bert_dir:
    :return:
    """
    tokenizer = BertTokenizer.from_pretrained(os.path.join(bert_dir, 'vocab.txt'))
    vocab = Vocabulary(padding=None, unknown=None)
    with open(os.path.join(bert_dir, 'vocab.txt')) as f:
        lines = f.readlines()
    vocab_list = []
    for line in lines:
        vocab_list.append(line.strip())
    vocab.add_word_lst(vocab_list)
    vocab.build_vocab()
    vocab.padding = '[PAD]'
    vocab.unknown = '[UNK]'
    for i in range(2):
        data.apply(lambda x: tokenizer.tokenize(" ".join(x[Const.INPUTS(i)])),
                   new_field_name=Const.INPUTS(i))
    data.apply(lambda x: ['[CLS]'] + x[Const.INPUTS(0)] + ['[SEP]'] + x[Const.INPUTS(1)] + ['[SEP]'],
               new_field_name=Const.INPUT)
    data.apply(lambda x: [0] * (len(x[Const.INPUTS(0)]) + 2) + [1] * (len(x[Const.INPUTS(1)]) + 1),
               new_field_name=Const.INPUT_LENS(0))
    data.apply(lambda x: [1] * len(x[Const.INPUT_LENS(0)]), new_field_name=Const.INPUT_LENS(1))
    max_len = 512
    data.apply(lambda x: x[Const.INPUT][: max_len], new_field_name=Const.INPUT)
    data.apply(lambda x: [vocab.to_index(w) for w in x[Const.INPUT]], new_field_name=Const.INPUT)
    data.apply(lambda x: x[Const.INPUT_LENS(0)][: max_len], new_field_name=Const.INPUT_LENS(0))
    data.apply(lambda x: x[Const.INPUT_LENS(1)][: max_len], new_field_name=Const.INPUT_LENS(1))
    target_vocab = Vocabulary(padding=None, unknown=None)
    target_vocab.add_word_lst(['neutral', 'contradiction', 'entailment'])
    target_vocab.build_vocab()
    data.apply(lambda x: target_vocab.to_index(x[Const.TARGET]), new_field_name=Const.TARGET)
    data.set_input(Const.INPUT, Const.INPUT_LENS(0), Const.INPUT_LENS(1), Const.TARGET)
    data.set_target(Const.TARGET)
    return data
 bert_dirs = 'path/to/bert/dir'
 # load raw data set
 train_data = SNLILoader().load('./data/snli/snli_1.0_train.jsonl')
 dev_data = SNLILoader().load('./data/snli/snli_1.0_dev.jsonl')
 test_data = SNLILoader().load('./data/snli/snli_1.0_test.jsonl')
 print('successfully load data sets!')
 train_data = preprocess_data(train_data, bert_dirs)
 dev_data = preprocess_data(dev_data, bert_dirs)
 test_data = preprocess_data(test_data, bert_dirs)
 model = BertForNLI(bert_dir=bert_dirs)
 trainer = Trainer(
    train_data=train_data,
    model=model,
    optimizer=Adam(lr=2e-5, model_params=model.parameters()),
    batch_size=torch.cuda.device_count() * 12,
    n_epochs=4,
    print_every=-1,
    dev_data=dev_data,
    metrics=AccuracyMetric(),
    metric_key='acc',
    device=[i for i in range(torch.cuda.device_count())],
    check_code_level=-1
 )
 trainer.train(load_best_model=True)
 tester = Tester(
    data=test_data,
    model=model,
    metrics=AccuracyMetric(),
    batch_size=torch.cuda.device_count() * 12,
    device=[i for i in range(torch.cuda.device_count())],
 )
 tester.test()
--- a/reproduction/matching/test/test_snlidataloader.py
+++ b/reproduction/matching/test/test_snlidataloader.py
@@ -0,0 +1,10 @@
 import unittest
 from ..data import SNLIDataLoader
 from fastNLP.core.vocabulary import Vocabulary
 class TestCWSDataLoader(unittest.TestCase):
    def test_case1(self):
        snli_loader = SNLIDataLoader()
        # TODO: still in progress
--- a/reproduction/seqence_labelling/cws/data/CWSDataLoader.py
+++ b/reproduction/seqence_labelling/cws/data/CWSDataLoader.py
@@ -0,0 +1,249 @@
 from fastNLP.io.embed_loader import EmbeddingOption, EmbedLoader
 from fastNLP.core.vocabulary import VocabularyOption
 from fastNLP.io.base_loader import DataSetLoader, DataInfo
 from typing import Union, Dict, List, Iterator
 from fastNLP import DataSet
 from fastNLP import Instance
 from fastNLP import Vocabulary
 from fastNLP import Const
 from reproduction.utils import check_dataloader_paths
 from functools import partial
 class SigHanLoader(DataSetLoader):
    """
        任务相关的说明可以在这里找到http://sighan.cs.uchicago.edu/
        支持的数据格式为，一行一句，不同的word用空格隔开。如下例
            共同  创造  美好  的  新  世纪  ——  二○○一年  新年
            女士  们  ，  先生  们  ，  同志  们  ，  朋友  们  ：
        读取sighan中的数据集，返回的DataSet将包含以下的内容fields:
            raw_chars: list(str), 每个元素是一个汉字
            chars: list(str), 每个元素是一个index(汉字对应的index)
            target: list(int), 根据不同的encoding_type会有不同的变化
        :param target_type: target的类型，当前支持以下的两种: "bmes", "shift_relay"
    """
    def __init__(self, target_type:str):
        super().__init__()
        if target_type.lower() not in ('bmes', 'shift_relay'):
            raise ValueError("target_type only supports 'bmes', 'shift_relay'.")
        self.target_type = target_type
        if target_type=='bmes':
            self._word_len_to_target = self._word_len_to_bems
        elif target_type=='shift_relay':
            self._word_len_to_target = self._word_lens_to_relay
    @staticmethod
    def _word_lens_to_relay(word_lens: Iterator[int]):
        """
        [1, 2, 3, ..] 转换为[0, 1, 0, 2, 1, 0,](start指示seg有多长);
        :param word_lens:
        :return: {'target': , 'end_seg_mask':, 'start_seg_mask':}
        """
        tags = []
        end_seg_mask = []
        start_seg_mask = []
        for word_len in word_lens:
            tags.extend([idx for idx in range(word_len - 1, -1, -1)])
            end_seg_mask.extend([0] * (word_len - 1) + [1])
            start_seg_mask.extend([1] + [0] * (word_len - 1))
        return {'target': tags, 'end_seg_mask': end_seg_mask, 'start_seg_mask': start_seg_mask}
    @staticmethod
    def _word_len_to_bems(word_lens:Iterator[int])->Dict[str, List[str]]:
        """
        :param word_lens: 每个word的长度
        :return:
        """
        tags = []
        for word_len in word_lens:
            if word_len==1:
                tags.append('S')
            else:
                tags.append('B')
                for _ in range(word_len-2):
                    tags.append('M')
                tags.append('E')
        return {'target':tags}
    @staticmethod
    def _gen_bigram(chars:List[str])->List[str]:
        """
        :param chars:
        :return:
        """
        return [c1+c2 for c1, c2 in zip(chars, chars[1:]+['<eos>'])]
    def load(self, path:str, bigram:bool=False)->DataSet:
        """
        :param path: str
        :param bigram: 是否使用bigram feature
        :return:
        """
        dataset = DataSet()
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line:  # 去掉空行
                    continue
                parts = line.split()
                word_lens = map(len, parts)
                chars = list(''.join(parts))
                tags = self._word_len_to_target(word_lens)
                assert len(chars)==len(tags['target'])
                dataset.append(Instance(raw_chars=chars, **tags, seq_len=len(chars)))
        if len(dataset)==0:
            raise RuntimeError(f"{path} has no valid data.")
        if bigram:
            dataset.apply_field(self._gen_bigram, field_name='raw_chars', new_field_name='bigrams')
        return dataset
    def process(self, paths: Union[str, Dict[str, str]], char_vocab_opt:VocabularyOption=None,
                char_embed_opt:EmbeddingOption=None, bigram_vocab_opt:VocabularyOption=None,
                bigram_embed_opt:EmbeddingOption=None, L:int=4):
        """
        支持的数据格式为一行一个sample，并且用空格隔开不同的词语。例如
        Option::
            共同  创造  美好  的  新  世纪  ——  二○○一年  新年  贺词
            （  二○○○年  十二月  三十一日  ）  （  附  图片  1  张  ）
            女士  们  ，  先生  们  ，  同志  们  ，  朋友  们  ：
        paths支持两种格式，第一种是str，第二种是Dict[str, str].
        Option::
            # 1. str类型
            # 1.1 传入具体的文件路径
            data = SigHanLoader('bmes').process('/path/to/cws/data.txt') # 将读取data.txt的内容
            # 包含以下的内容data.vocabs['chars']:Vocabulary对象，
            #             data.vocabs['target']: Vocabulary对象，根据encoding_type可能会没有该值
            #             data.embeddings['chars']: Embedding对象. 只有提供了预训练的词向量的路径才有该项
            #             data.datasets['train']: DataSet对象
            #                   包含的field有:
            #                       raw_chars: list[str], 每个元素是一个汉字
            #                       chars: list[int], 每个元素是汉字对应的index
            #                       target: list[int], 根据encoding_type有对应的变化
            # 1.2 传入一个目录, 里面必须包含train.txt文件
            data = SigHanLoader('bmes').process('path/to/cws/') #将尝试在该目录下读取 train.txt, test.txt以及dev.txt
            # 包含以下的内容data.vocabs['chars']: Vocabulary对象
            #             data.vocabs['target']:Vocabulary对象
            #             data.embeddings['chars']: 仅在提供了预训练embedding路径的情况下，为Embedding对象;
            #             data.datasets['train']: DataSet对象
            #                    包含的field有:
            #                       raw_chars: list[str], 每个元素是一个汉字
            #                       chars: list[int], 每个元素是汉字对应的index
            #                       target: list[int], 根据encoding_type有对应的变化
            #             data.datasets['dev']: DataSet对象，如果文件夹下包含了dev.txt；内容与data.datasets['train']一样
            # 2. dict类型, key是文件的名称，value是对应的读取路径. 必须包含'train'这个key
            paths = {'train': '/path/to/train/train.txt', 'test':'/path/to/test/test.txt', 'dev':'/path/to/dev/dev.txt'}
            data = SigHanLoader(paths).process(paths)
            # 结果与传入目录时是一致的，但是可以传入多个数据集。data.datasets中的key将与这里传入的一致
        :param paths: 支持传入目录，文件路径，以及dict。
        :param char_vocab_opt: 用于构建chars的vocabulary参数，默认为min_freq=2
        :param char_embed_opt: 用于读取chars的Embedding的参数，默认不读取pretrained的embedding
        :param bigram_vocab_opt: 用于构建bigram的vocabulary参数，默认不使用bigram, 仅在指定该参数的情况下会带有bigrams这个field。
            为List[int], 每个instance长度与chars一样, abcde的bigram为ab bc cd de e<eos>
        :param bigram_embed_opt: 用于读取预训练bigram的参数，仅在传入bigram_vocab_opt有效
        :param L: 当target_type为shift_relay时传入的segment长度
        :return:
        """
        # 推荐大家使用这个check_data_loader_paths进行paths的验证
        paths = check_dataloader_paths(paths)
        datasets = {}
        data = DataInfo()
        bigram = bigram_vocab_opt is not None
        for name, path in paths.items():
            dataset = self.load(path, bigram=bigram)
            datasets[name] = dataset
        input_fields = []
        target_fields = []
        # 创建vocab
        char_vocab = Vocabulary(min_freq=2) if char_vocab_opt is None else Vocabulary(**char_vocab_opt)
        char_vocab.from_dataset(datasets['train'], field_name='raw_chars')
        char_vocab.index_dataset(*datasets.values(), field_name='raw_chars', new_field_name='chars')
        data.vocabs[Const.CHAR_INPUT] = char_vocab
        input_fields.extend([Const.CHAR_INPUT, Const.INPUT_LEN, Const.TARGET])
        target_fields.append(Const.TARGET)
        # 创建target
        if self.target_type == 'bmes':
            target_vocab = Vocabulary(unknown=None, padding=None)
            target_vocab.add_word_lst(['B']*4+['M']*3+['E']*2+['S'])
            target_vocab.index_dataset(*datasets.values(), field_name='target')
            data.vocabs[Const.TARGET] = target_vocab
        if char_embed_opt is not None:
            char_embed = EmbedLoader.load_with_vocab(**char_embed_opt, vocab=char_vocab)
            data.embeddings['chars'] = char_embed
        if bigram:
            bigram_vocab = Vocabulary(**bigram_vocab_opt)
            bigram_vocab.from_dataset(datasets['train'], field_name='bigrams')
            bigram_vocab.index_dataset(*datasets.values(), field_name='bigrams')
            data.vocabs['bigrams'] = bigram_vocab
            if bigram_embed_opt is not None:
                bigram_embed = EmbedLoader.load_with_vocab(**bigram_embed_opt, vocab=bigram_vocab)
                data.embeddings['bigrams'] = bigram_embed
            input_fields.append('bigrams')
        if self.target_type == 'shift_relay':
            func = partial(self._clip_target, L=L)
            for name, dataset in datasets.items():
                res = dataset.apply_field(func, field_name='target')
                relay_target = [res_i[0] for res_i in res]
                relay_mask = [res_i[1] for res_i in res]
                dataset.add_field('relay_target', relay_target, is_input=True, is_target=False, ignore_type=False)
                dataset.add_field('relay_mask', relay_mask, is_input=True, is_target=False, ignore_type=False)
        if self.target_type == 'shift_relay':
            input_fields.extend(['end_seg_mask'])
            target_fields.append('start_seg_mask')
        # 将dataset加入DataInfo
        for name, dataset in datasets.items():
            dataset.set_input(*input_fields)
            dataset.set_target(*target_fields)
            data.datasets[name] = dataset
        return data
    @staticmethod
    def _clip_target(target:List[int], L:int):
        """
        只有在target_type为shift_relay的使用
        :param target: List[int]
        :param L:
        :return:
        """
        relay_target_i = []
        tmp = []
        for j in range(len(target) - 1):
            tmp.append(target[j])
            if target[j] > target[j + 1]:
                pass
            else:
                relay_target_i.extend([L - 1 if t >= L else t for t in tmp[::-1]])
                tmp = []
        # 处理未结束的部分
        if len(tmp) == 0:
            relay_target_i.append(0)
        else:
            tmp.append(target[-1])
            relay_target_i.extend([L - 1 if t >= L else t for t in tmp[::-1]])
        relay_mask_i = []
        j = 0
        while j < len(target):
            seg_len = target[j] + 1
            if target[j] < L:
                relay_mask_i.extend([0] * (seg_len))
            else:
                relay_mask_i.extend([1] * (seg_len - L) + [0] * L)
            j = seg_len + j
        return relay_target_i, relay_mask_i
--- a/reproduction/seqence_labelling/cws/model/metric.py
+++ b/reproduction/seqence_labelling/cws/model/metric.py
@@ -0,0 +1,44 @@
 from fastNLP.core.metrics import MetricBase
 class RelayMetric(MetricBase):
    def __init__(self, pred=None, pred_mask=None, target=None, start_seg_mask=None):
        super().__init__()
        self._init_param_map(pred=pred, pred_mask=pred_mask, target=target, start_seg_mask=start_seg_mask)
        self.tp = 0
        self.rec = 0
        self.pre = 0
    def evaluate(self, pred, pred_mask, target, start_seg_mask):
        """
        给定每个batch，累计一下结果。
        :param pred: 预测的结果，为当前位置的开始的segment的（长度-1）
        :param pred_mask: 当前位置预测有segment开始
        :param target: 当前位置开始的segment的(长度-1)
        :param start_seg_mask: 当前有segment结束
        :return:
        """
        self.tp += ((pred.long().eq(target.long())).__and__(pred_mask.byte().__and__(start_seg_mask.byte()))).sum().item()
        self.rec += start_seg_mask.sum().item()
        self.pre += pred_mask.sum().item()
    def get_metric(self, reset=True):
        """
        在所有数据都计算结束之后，得到performance
        :param reset:
        :return:
        """
        pre = self.tp/(self.pre + 1e-12)
        rec = self.tp/(self.rec + 1e-12)
        f = 2*pre*rec/(1e-12 + pre + rec)
        if reset:
            self.tp = 0
            self.rec = 0
            self.pre = 0
            self.bigger_than_L = 0
        return {'f': round(f, 6), 'pre': round(pre, 6), 'rec': round(rec, 6)}
--- a/reproduction/seqence_labelling/cws/model/model.py
+++ b/reproduction/seqence_labelling/cws/model/model.py
@@ -0,0 +1,74 @@
 from torch import nn
 import torch
 from fastNLP.modules import Embedding
 import numpy as np
 from reproduction.seqence_labelling.cws.model.module import FeatureFunMax, SemiCRFShiftRelay
 from fastNLP.modules import LSTM
 class ShiftRelayCWSModel(nn.Module):
    """
    该模型可以用于进行分词操作
    包含两个方法，
        forward(chars, bigrams, seq_len) -> {'loss': batch_size,}
        predict(chars, bigrams) -> {'pred': batch_size x max_len, 'pred_mask': batch_size x max_len}
            pred是对当前segment的长度预测，pred_mask是仅在有预测的位置为1
    :param char_embed: 预训练的Embedding或者embedding的shape
    :param bigram_embed: 预训练的Embedding或者embedding的shape
    :param hidden_size: LSTM的隐藏层大小
    :param num_layers: LSTM的层数
    :param L: SemiCRFShiftRelay的segment大小
    :param num_bigram_per_char: 每个character对应的bigram的数量
    :param drop_p: Dropout的大小
    """
    def __init__(self, char_embed:Embedding, bigram_embed:Embedding, hidden_size:int=400, num_layers:int=1,
                 L:int=6, num_bigram_per_char:int=1, drop_p:float=0.2):
        super().__init__()
        self.char_embedding = Embedding(char_embed, dropout=drop_p)
        self._pretrained_embed = False
        if isinstance(char_embed, np.ndarray):
            self._pretrained_embed = True
        self.bigram_embedding = Embedding(bigram_embed, dropout=drop_p)
        self.lstm = LSTM(100 * (num_bigram_per_char + 1), hidden_size // 2, num_layers=num_layers, bidirectional=True,
                         batch_first=True)
        self.feature_fn = FeatureFunMax(hidden_size, L)
        self.semi_crf_relay = SemiCRFShiftRelay(L)
        self.feat_drop = nn.Dropout(drop_p)
        self.reset_param()
        # self.feature_fn.reset_parameters()
    def reset_param(self):
        for name, param in self.named_parameters():
            if 'embedding' in name and self._pretrained_embed:
                continue
            if 'bias_hh' in name:
                nn.init.constant_(param, 0)
            elif 'bias_ih' in name:
                nn.init.constant_(param, 1)
            elif len(param.size()) < 2:
                nn.init.uniform_(param, -0.1, 0.1)
            else:
                nn.init.xavier_uniform_(param)
    def get_feats(self, chars, bigrams, seq_len):
        batch_size, max_len = chars.size()
        chars = self.char_embedding(chars)
        bigrams = self.bigram_embedding(bigrams)
        bigrams = bigrams.view(bigrams.size(0), max_len, -1)
        chars = torch.cat([chars, bigrams], dim=-1)
        feats, _ = self.lstm(chars, seq_len)
        feats = self.feat_drop(feats)
        logits, relay_logits = self.feature_fn(feats)
        return logits, relay_logits
    def forward(self, chars, bigrams, relay_target, relay_mask, end_seg_mask, seq_len):
        logits, relay_logits = self.get_feats(chars, bigrams, seq_len)
        loss = self.semi_crf_relay(logits, relay_logits, relay_target, relay_mask, end_seg_mask, seq_len)
        return {'loss':loss}
    def predict(self, chars, bigrams, seq_len):
        logits, relay_logits = self.get_feats(chars, bigrams, seq_len)
        pred, pred_mask = self.semi_crf_relay.predict(logits, relay_logits, seq_len)
        return {'pred': pred, 'pred_mask': pred_mask}
--- a/reproduction/seqence_labelling/cws/model/module.py
+++ b/reproduction/seqence_labelling/cws/model/module.py
@@ -0,0 +1,198 @@
 from torch import nn
 import torch
 from fastNLP.modules import Embedding
 import numpy as np
 class SemiCRFShiftRelay(nn.Module):
    """
    该模块是一个decoder，但
    """
    def __init__(self, L):
        """
        :param L: 不包含relay的长度
        """
        if L<2:
            raise RuntimeError()
        super().__init__()
        self.L = L
    def forward(self, logits, relay_logits, relay_target, relay_mask, end_seg_mask, seq_len):
        """
        relay node是接下来L个字都不是它的结束。relay的状态是往后滑动1个位置
        :param logits: batch_size x max_len x L, 当前位置往左边L个segment的分数，最后一维的0是长度为1的segment(即本身)
        :param relay_logits: batch_size x max_len, 当前位置是接下来L-1个位置都不是终点的分数
        :param relay_target: batch_size x max_len 每个位置他的segment在哪里开始的。如果超过L，则一直保持为L-1。比如长度为
            5的词，L=3, [0, 1, 2, 2, 2]
        :param relay_mask: batch_size x max_len, 在需要relay的地方为1, 长度为5的词, L=3时，为[1, 1, 1, 0, 0]
        :param end_seg_mask: batch_size x max_len, segment结束的地方为1。
        :param seq_len: batch_size, 句子的长度
        :return: loss: batch_size,
        """
        batch_size, max_len, L = logits.size()
        # 当前时刻为relay node的分数是多少
        relay_scores = logits.new_zeros(batch_size, max_len)
        # 当前时刻结束的分数是多少
        scores = logits.new_zeros(batch_size, max_len+1)
        # golden的分数
        gold_scores = relay_logits[:, 0].masked_fill(relay_mask[:, 0].eq(0), 0) + \
                                logits[:, 0, 0].masked_fill(end_seg_mask[:, 0].eq(0), 0)
        # 初始化
        scores[:, 1] = logits[:, 0, 0]
        batch_i = torch.arange(batch_size).to(logits.device).long()
        relay_scores[:, 0] = relay_logits[:, 0]
        last_relay_index = max_len - self.L
        for t in range(1, max_len):
            real_L = min(t+1, L)
            flip_logits_t = logits[:, t, :real_L].flip(dims=[1])  # flip之后低0个位置为real_L-1的segment
            # 计算relay_scores的更新
            if t<last_relay_index:
                #   (1) 从正常位置跳转
                tmp1 = relay_logits[:, t] + scores[:, t] # batch_size
                #   (2) 从relay跳转
                tmp2 = relay_logits[:, t] + relay_scores[:, t-1] # batch_size
                tmp1 = torch.stack([tmp1, tmp2], dim=0)
                relay_scores[:, t] = torch.logsumexp(tmp1, dim=0)
            # 计算scores的更新
            #  (1)从之前的位置跳转过来的
            tmp1 = scores[:, t-real_L+1:t+1] + flip_logits_t  # batch_size x L
            if t>self.L-1:
                #  (2)从relay跳转过来的
                tmp2 = relay_scores[:, t-self.L] # batch_size
                tmp2 = tmp2 + flip_logits_t[:, 0] # batch_size
                tmp1 = torch.cat([tmp1, tmp2.unsqueeze(-1)], dim=-1)
            scores[:, t+1] = torch.logsumexp(tmp1, dim=-1) # 更新当前时刻的分数
            # 计算golden
            seg_i = relay_target[:, t] # batch_size
            gold_segment_scores = logits[:, t][(batch_i, seg_i)].masked_fill(end_seg_mask[:, t].eq(0), 0) # batch_size, 后向从0到L长度的segment的分数
            relay_score = relay_logits[:, t].masked_fill(relay_mask[:, t].eq(0), 0)
            gold_scores = gold_scores + relay_score + gold_segment_scores
        all_scores = scores.gather(dim=1, index=seq_len.unsqueeze(1)).squeeze(1) # batch_size
        return all_scores - gold_scores
    def predict(self, logits, relay_logits, seq_len):
        """
        relay node是接下来L个字都不是它的结束。relay的状态是往后滑动L-1个位置
        :param logits: batch_size x max_len x L, 当前位置左边L个segment的分数，最后一维的0是长度为1的segment(即本身)
        :param relay_logits: batch_size x max_len, 当前位置是接下来L-1个位置都不是终点的分数
        :param seq_len: batch_size, 句子的长度
        :return: pred: batch_size x max_len以该点开始的segment的(长度-1); pred_mask为1的地方预测有segment开始
        """
        batch_size, max_len, L = logits.size()
        # 当前时刻为relay node的分数是多少
        max_relay_scores = logits.new_zeros(batch_size, max_len)
        relay_bt = seq_len.new_zeros(batch_size, max_len)  # 当前结果是否来自于relay的结果
        # 当前时刻结束的分数是多少
        max_scores = logits.new_zeros(batch_size, max_len+1)
        bt = seq_len.new_zeros(batch_size, max_len)
        # 初始化
        max_scores[:, 1] = logits[:, 0, 0]
        max_relay_scores[:, 0] = relay_logits[:, 0]
        last_relay_index = max_len - self.L
        for t in range(1, max_len):
            real_L = min(t+1, L)
            flip_logits_t = logits[:, t, :real_L].flip(dims=[1])  # flip之后低0个位置为real_L-1的segment
            # 计算relay_scores的更新
            if t<last_relay_index:
                #  (1) 从正常位置跳转
                tmp1 = relay_logits[:, t] + max_scores[:, t]
                #   (2) 从relay跳转
                tmp2 = relay_logits[:, t] + max_relay_scores[:, t-1] # batch_size
                # 每个sample的倒数L位不能是relay了
                tmp2 = tmp2.masked_fill(seq_len.le(t+L), float('-inf'))
                mask_i = tmp1.lt(tmp2) # 为1的位置为relay跳转
                relay_bt[:, t].masked_fill_(mask_i, 1)
                max_relay_scores[:, t] = torch.max(tmp1, tmp2)
            # 计算scores的更新
            #  (1)从之前的位置跳转过来的
            tmp1 = max_scores[:, t-real_L+1:t+1] + flip_logits_t  # batch_size x L
            tmp1 = tmp1.flip(dims=[1]) # 0的位置代表长度为1的segment
            if self.L-1<t:
                #  (2)从relay跳转过来的
                tmp2 = max_relay_scores[:, t-self.L] # batch_size
                tmp2 = tmp2 + flip_logits_t[:, 0]
                tmp1 = torch.cat([tmp1, tmp2.unsqueeze(-1)], dim=-1)
            # 看哪个更大
            max_score, pt = torch.max(tmp1, dim=1)
            max_scores[:, t+1] = max_score
            # mask_i = pt.ge(self.L)
            bt[:, t] = pt # 假设L=3, 那么对于0,1,2,3分别代表的是[t, t], [t-1, t], [t-2, t], [t-self.L(relay), t]
        # 需要把结果decode出来
        pred = np.zeros((batch_size, max_len), dtype=int)
        pred_mask = np.zeros((batch_size, max_len), dtype=int)
        seq_len = seq_len.tolist()
        bt = bt.tolist()
        relay_bt = relay_bt.tolist()
        for b in range(batch_size):
            seq_len_i = seq_len[b]
            bt_i = bt[b][:seq_len_i]
            relay_bt_i = relay_bt[b][:seq_len_i]
            j = seq_len_i - 1
            assert relay_bt_i[j]!=1
            while j>-1:
                if bt_i[j]==self.L:
                    seg_start_pos = j
                    j = j-self.L
                    while relay_bt_i[j]!=0 and j>-1:
                        j = j - 1
                    pred[b, j] = seg_start_pos - j
                    pred_mask[b, j] = 1
                else:
                    length = bt_i[j]
                    j = j - bt_i[j]
                    pred_mask[b, j] = 1
                    pred[b, j] = length
                j = j - 1
        return torch.LongTensor(pred).to(logits.device), torch.LongTensor(pred_mask).to(logits.device)
 class FeatureFunMax(nn.Module):
    def __init__(self, hidden_size:int, L:int):
        """
        用于计算semi-CRF特征的函数。给定batch_size x max_len x hidden_size形状的输入，输出为batch_size x max_len x L的
        分数，以及batch_size x max_len的relay的分数。两者的区别参考论文 TODO 补充
        :param hidden_size: 输入特征的维度大小
        :param L: 不包含relay node的segment的长度大小。
        """
        super().__init__()
        self.end_fc = nn.Linear(hidden_size, 1, bias=False)
        self.whole_w = nn.Parameter(torch.randn(L, hidden_size))
        self.relay_fc = nn.Linear(hidden_size, 1)
        self.length_bias = nn.Parameter(torch.randn(L))
        self.L = L
    def forward(self, logits):
        """
        :param logits: batch_size x max_len x hidden_size
        :return: batch_size x max_len x L # 最后一维为左边segment的分数，0处为长度为1的segment
                 batch_size x max_len, # 当前位置是接下来L-1个位置都不是终点的分数
        """
        batch_size, max_len, hidden_size = logits.size()
        # start_scores = self.start_fc(logits) # batch_size x max_len x 1 # 每个位置作为start的分数
        tmp = logits.new_zeros(batch_size, max_len+self.L-1, hidden_size)
        tmp[:, -max_len:] = logits
        # batch_size x max_len x hidden_size x (self.L) -> batch_size x max_len x (self.L) x hidden_size
        start_logits = tmp.unfold(dimension=1, size=self.L, step=1).transpose(2, 3).flip(dims=[2])
        end_scores = self.end_fc(logits) # batch_size x max_len x 1
        # 计算relay的特征
        relay_tmp = logits.new_zeros(batch_size, max_len, hidden_size)
        relay_tmp[:, :-self.L] = logits[:, self.L:]
        # batch_size x max_len x hidden_size
        relay_logits_max = torch.max(relay_tmp, logits) # end - start
        logits_max = torch.max(logits.unsqueeze(2), start_logits) # batch_size x max_len x L x hidden_size
        whole_scores = (logits_max*self.whole_w).sum(dim=-1) # batch_size x max_len x self.L
        # whole_scores = self.whole_fc().squeeze(-1) # bz x max_len x self.L
        # batch_size x max_len
        relay_scores = self.relay_fc(relay_logits_max).squeeze(-1)
        return whole_scores+end_scores+self.length_bias.view(1, 1, -1), relay_scores
--- a/reproduction/seqence_labelling/cws/test/init.py
+++ b/reproduction/seqence_labelling/cws/test/init.py
--- a/reproduction/seqence_labelling/cws/test/test_CWSDataLoader.py
+++ b/reproduction/seqence_labelling/cws/test/test_CWSDataLoader.py
@@ -0,0 +1,17 @@
 import unittest
 from ..data.CWSDataLoader import SigHanLoader
 from fastNLP.core.vocabulary import VocabularyOption
 class TestCWSDataLoader(unittest.TestCase):
    def test_case1(self):
        cws_loader = SigHanLoader(target_type='bmes')
        data = cws_loader.process('pku_demo.txt')
        print(data.datasets)
    def test_calse2(self):
        cws_loader = SigHanLoader(target_type='bmes')
        data = cws_loader.process('pku_demo.txt', bigram_vocab_opt=VocabularyOption())
        print(data.datasets)
--- a/reproduction/seqence_labelling/cws/train_shift_relay.py
+++ b/reproduction/seqence_labelling/cws/train_shift_relay.py
@@ -0,0 +1,68 @@
 import os
 from fastNLP import cache_results
 from reproduction.seqence_labelling.cws.data.CWSDataLoader import SigHanLoader
 from reproduction.seqence_labelling.cws.model.model import ShiftRelayCWSModel
 from fastNLP.io.embed_loader import EmbeddingOption
 from fastNLP.core.vocabulary import VocabularyOption
 from fastNLP import Trainer
 from torch.optim import Adam
 from fastNLP import BucketSampler
 from fastNLP import GradientClipCallback
 from reproduction.seqence_labelling.cws.model.metric import RelayMetric
 # 借助一下fastNLP的自动缓存机制，但是只能缓存4G以下的结果
@cache_results(None)
 def prepare_data():
    data = SigHanLoader(target_type='shift_relay').process(file_dir, char_embed_opt=char_embed_opt,
                                                           bigram_vocab_opt=bigram_vocab_opt,
                                                           bigram_embed_opt=bigram_embed_opt,
                                                           L=L)
    return data
 #########hyper
 L = 4
 hidden_size = 200
 num_layers = 1
 drop_p = 0.2
 lr = 0.02
 #########hyper
 device = 0
 # !!!!这里前往不要放完全路径，因为这样会暴露你们在服务器上的用户名，比较危险。所以一定要使用相对路径，最好把数据放到
 #   你们的reproduction路径下，然后设置.gitignore
 file_dir = '/path/to/pku'
 char_embed_path = '/path/to/1grams_t3_m50_corpus.txt'
 bigram_embed_path = 'path/to/2grams_t3_m50_corpus.txt'
 bigram_vocab_opt = VocabularyOption(min_freq=3)
 char_embed_opt = EmbeddingOption(embed_filepath=char_embed_path)
 bigram_embed_opt = EmbeddingOption(embed_filepath=bigram_embed_path)
 data_name = os.path.basename(file_dir)
 cache_fp = 'caches/{}.pkl'.format(data_name)
 data = prepare_data(_cache_fp=cache_fp, _refresh=False)
 model = ShiftRelayCWSModel(char_embed=data.embeddings['chars'], bigram_embed=data.embeddings['bigrams'],
                           hidden_size=hidden_size, num_layers=num_layers,
                           L=L, num_bigram_per_char=1, drop_p=drop_p)
 sampler = BucketSampler(batch_size=32)
 optimizer = Adam(model.parameters(), lr=lr)
 clipper = GradientClipCallback(clip_value=5, clip_type='value')
 callbacks = [clipper]
 # if pretrain:
 #     fixer = FixEmbedding([model.char_embedding, model.bigram_embedding], fix_until=fix_until)
 #     callbacks.append(fixer)
 trainer = Trainer(data.datasets['train'], model, optimizer=optimizer, loss=None,
                  batch_size=32, sampler=sampler, update_every=5,
                  n_epochs=3, print_every=5,
                  dev_data=data.datasets['dev'], metrics=RelayMetric(), metric_key='f',
                  validate_every=-1, save_path=None,
                  prefetch=True, use_tqdm=True, device=device,
                  callbacks=callbacks,
                  check_code_level=0)
 trainer.train()
--- a/reproduction/text_classification/data/TODO.txt
+++ b/reproduction/text_classification/data/TODO.txt
--- a/reproduction/text_classification/data/yelpLoader.py
+++ b/reproduction/text_classification/data/yelpLoader.py
@@ -0,0 +1,68 @@
 import ast
 from fastNLP import DataSet, Instance, Vocabulary
 from fastNLP.core.vocabulary import VocabularyOption
 from fastNLP.io import JsonLoader
 from fastNLP.io.base_loader import DataInfo
 from fastNLP.io.embed_loader import EmbeddingOption
 from fastNLP.io.file_reader import _read_json
 from typing import Union, Dict
 from reproduction.Star_transformer.datasets import EmbedLoader
 from reproduction.utils import check_dataloader_paths
 class yelpLoader(JsonLoader):
    """
    读取Yelp数据集, DataSet包含fields:
        review_id: str, 22 character unique review id
        user_id: str, 22 character unique user id
        business_id: str, 22 character business id
        useful: int, number of useful votes received
        funny: int, number of funny votes received
        cool: int, number of cool votes received
        date: str, date formatted YYYY-MM-DD
        words: list(str), 需要分类的文本
        target: str, 文本的标签
    数据来源: https://www.yelp.com/dataset/download
    :param fine_grained: 是否使用SST-5标准，若 ``False`` , 使用SST-2。Default: ``False``
    """
    def __init__(self, fine_grained=False):
        super(yelpLoader, self).__init__()
        tag_v = {'1.0': 'very negative', '2.0': 'negative', '3.0': 'neutral',
            '4.0': 'positive', '5.0': 'very positive'}
        if not fine_grained:
            tag_v['1.0'] = tag_v['2.0']
            tag_v['5.0'] = tag_v['4.0']
        self.fine_grained = fine_grained
        self.tag_v = tag_v
    def _load(self, path):
        ds = DataSet()
        for idx, d in _read_json(path, fields=self.fields_list, dropna=self.dropna):
            d = ast.literal_eval(d)
            d["words"] = d.pop("text").split()
            d["target"] = self.tag_v[str(d.pop("stars"))]
            ds.append(Instance(**d))
        return ds
    def process(self, paths: Union[str, Dict[str, str]], vocab_opt: VocabularyOption = None,
                embed_opt: EmbeddingOption = None):
        paths = check_dataloader_paths(paths)
        datasets = {}
        info = DataInfo()
        vocab = Vocabulary(min_freq=2) if vocab_opt is None else Vocabulary(**vocab_opt)
        for name, path in paths.items():
            dataset = self.load(path)
            datasets[name] = dataset
            vocab.from_dataset(dataset, field_name="words")
        info.vocabs = vocab
        info.datasets = datasets
        if embed_opt is not None:
            embed = EmbedLoader.load_with_vocab(**embed_opt, vocab=vocab)
            info.embeddings['words'] = embed
        return info
--- a/reproduction/text_classification/model/bert.py
+++ b/reproduction/text_classification/model/bert.py
@@ -0,0 +1 @@
 # TODO
--- a/reproduction/text_classification/model/char_cnn.py
+++ b/reproduction/text_classification/model/char_cnn.py
@@ -0,0 +1 @@
 # TODO
--- a/reproduction/text_classification/model/dpcnn.py
+++ b/reproduction/text_classification/model/dpcnn.py
@@ -0,0 +1 @@
 # TODO
--- a/reproduction/text_classification/test/TODO.txt
+++ b/reproduction/text_classification/test/TODO.txt
--- a/reproduction/text_classification/test/sample_yelp.json
+++ b/reproduction/text_classification/test/sample_yelp.json
@@ -0,0 +1,20 @@
 "{\"review_id\":\"Q1sbwvVQXV2734tPgoKj4Q\",\"user_id\":\"hG7b0MtEbXx5QzbzE6C_VA\",\"business_id\":\"ujmEBvifdJM6h6RLv4wQIg\",\"stars\":1.0,\"useful\":6,\"funny\":1,\"cool\":0,\"text\":\"Total bill for this horrible service? Over $8Gs. These crooks actually had the nerve to charge us $69 for 3 pills. I checked online the pills can be had for 19 cents EACH! Avoid Hospital ERs at all costs.\",\"date\":\"2013-05-07 04:34:36\"}\n"
 "{\"review_id\":\"GJXCdrto3ASJOqKeVWPi6Q\",\"user_id\":\"yXQM5uF2jS6es16SJzNHfg\",\"business_id\":\"NZnhc2sEQy3RmzKTZnqtwQ\",\"stars\":5.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"I *adore* Travis at the Hard Rock's new Kelly Cardenas Salon!  I'm always a fan of a great blowout and no stranger to the chains that offer this service; however, Travis has taken the flawless blowout to a whole new level!  \\n\\nTravis's greets you with his perfectly green swoosh in his otherwise perfectly styled black hair and a Vegas-worthy rockstar outfit.  Next comes the most relaxing and incredible shampoo -- where you get a full head message that could cure even the very worst migraine in minutes --- and the scented shampoo room.  Travis has freakishly strong fingers (in a good way) and use the perfect amount of pressure.  That was superb!  Then starts the glorious blowout... where not one, not two, but THREE people were involved in doing the best round-brush action my hair has ever seen.  The team of stylists clearly gets along extremely well, as it's evident from the way they talk to and help one another that it's really genuine and not some corporate requirement.  It was so much fun to be there! \\n\\nNext Travis started with the flat iron.  The way he flipped his wrist to get volume all around without over-doing it and making me look like a Texas pagent girl was admirable.  It's also worth noting that he didn't fry my hair -- something that I've had happen before with less skilled stylists.  At the end of the blowout & style my hair was perfectly bouncey and looked terrific.  The only thing better?  That this awesome blowout lasted for days! \\n\\nTravis, I will see you every single time I'm out in Vegas.  You make me feel beauuuutiful!\",\"date\":\"2017-01-14 21:30:33\"}\n"
 "{\"review_id\":\"2TzJjDVDEuAW6MR5Vuc1ug\",\"user_id\":\"n6-Gk65cPZL6Uz8qRm3NYw\",\"business_id\":\"WTqjgwHlXbSFevF32_DJVw\",\"stars\":5.0,\"useful\":3,\"funny\":0,\"cool\":0,\"text\":\"I have to say that this office really has it together, they are so organized and friendly!  Dr. J. Phillipp is a great dentist, very friendly and professional.  The dental assistants that helped in my procedure were amazing, Jewel and Bailey helped me to feel comfortable!  I don't have dental insurance, but they have this insurance through their office you can purchase for $80 something a year and this gave me 25% off all of my dental work, plus they helped me get signed up for care credit which I knew nothing about before this visit!  I highly recommend this office for the nice synergy the whole office has!\",\"date\":\"2016-11-09 20:09:03\"}\n"
 "{\"review_id\":\"yi0R0Ugj_xUx_Nek0-_Qig\",\"user_id\":\"dacAIZ6fTM6mqwW5uxkskg\",\"business_id\":\"ikCg8xy5JIg_NGPx-MSIDA\",\"stars\":5.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Went in for a lunch. Steak sandwich was delicious, and the Caesar salad had an absolutely delicious dressing, with a perfect amount of dressing, and distributed perfectly across each leaf. I know I'm going on about the salad ... But it was perfect.\\n\\nDrink prices were pretty good.\\n\\nThe Server, Dawn, was friendly and accommodating. Very happy with her.\\n\\nIn summation, a great pub experience. Would go again!\",\"date\":\"2018-01-09 20:56:38\"}\n"
 "{\"review_id\":\"11a8sVPMUFtaC7_ABRkmtw\",\"user_id\":\"ssoyf2_x0EQMed6fgHeMyQ\",\"business_id\":\"b1b1eb3uo-w561D0ZfCEiQ\",\"stars\":1.0,\"useful\":7,\"funny\":0,\"cool\":0,\"text\":\"Today was my second out of three sessions I had paid for. Although my first session went well, I could tell Meredith had a particular enjoyment for her male clients over her female. However, I returned because she did my teeth fine and I was pleased with the results. When I went in today, I was in the whitening room with three other gentlemen. My appointment started out well, although, being a person who is in the service industry, I always attend to my female clientele first when a couple arrives. Unbothered by those signs, I waited my turn. She checked on me once after my original 30 minute timer to ask if I was ok. She attended my boyfriend on numerous occasions, as well as the other men, and would exit the room without even asking me or looking to see if I had any irritation. Half way through, another woman had showed up who she was explaining the deals to in the lobby. While she admits timers must be reset half way through the process, she reset my boyfriends, left, rest the gentleman furthest away from me who had time to come in, redeem his deal, get set, and gave his timer done, before me, then left, and at this point my time was at 10 minutes. So, she should have reset it 5 minutes ago, according to her. While I sat there patiently this whole time with major pain in my gums, i watched the time until the lamp shut off. Not only had she reset two others, explained deals to other guest, but she never once checked on my time. When my light turned off, I released the stance of my mouth to a more relaxed state, assuming I was only getting a thirty minute session instead of the usual 45, because she had yet to come in. At this point, the teeth formula was not only burning the gum she neglected for 25 minutes now, but it began to burn my lips. I began squealing and slapping my chair trying to get her attention from the other room in a panic. I was in so much pain, that by the time she entered the room I was already out of my chair. She finally then acknowledged me, and asked if she could put vitamin E on my gum burn (pictured below). At this point, she has treated two other gums burns, while neglecting me, and I was so irritated that I had to suffer, all I wanted was to leave. While I waited for my boyfriend, she kept harassing me about the issue. Saying, \\\"well burns come with teeth whitening.\\\" While I totally agree, and under justifiable circumstances would not be as irritate, it could have easily been avoid if she had checked on me even a second time, so I could let her know. Not only did she never check on my physical health, she couldn't even take two seconds to reset the timer, which she even admitted to me. Her accuse was that she was coming in to do it, but I had the light off for a solid two minutes before I couldn't stand the pain. She admitted it should be reset every 15 minutes, which means for 25 minutes she did not bother to help me at all. Her guest in the lobby then proceeded to attack me as well, simply because I wanted to leave after the way I was treated. I also expected a refund for not getting a complete session today, due to the neglect, and the fact I won't be returning for my last, she had failed to do that. She was even screaming from the door, and continued to until my boyfriend and I were down the steps. I have never in my life been more appalled by a grown woman's behavior, who claims to be in the business for \\\"10 years.\\\" Admit your wrongs, but don't make your guest feel unwelcome because you can't do you job properly.\",\"date\":\"2018-01-30 23:07:38\"}\n"
 "{\"review_id\":\"fdiNeiN_hoCxCMy2wTRW9g\",\"user_id\":\"w31MKYsNFMrjhWxxAb5wIw\",\"business_id\":\"eU_713ec6fTGNO4BegRaww\",\"stars\":4.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"I'll be the first to admit that I was not excited about going to La Tavolta. Being a food snob, when a group of friends suggested we go for dinner I looked online at the menu and to me there was nothing special and it seemed overpriced.  Im also not big on ordering pasta when I go out. Alas, I was outnumbered. Thank goodness! I ordered the sea bass special. It was to die for. Cooked perfectly, seasoned perfectly, perfect portion. I can not say enough good things about this dish. When the server asked how it was he seemed very proud of the dish and said, \\\" doesn't she (the chef) do an incredible job?\\\" She does. \\n\\nMy hubby got the crab tortellini and also loved his. I heard \\\"mmmm this is so good\\\" from all around the table. Our waiter was super nice and even gave us free desserts because we were some of the last people in the restaurant. Service was very slow and the place was PACKED but we had our jugs of wine and a large group with good conversation so it didn't seem to bother anyone.\\n\\nSo-\\n\\nDo order the calamari and fried zucchini appetizers. Leave out the mussels. \\n\\nIf they have the sea bass special, I highly recommend it. The chicken parm and crab tortellini were also very good and very big. The chicken Romano was a bit bland. The house salads were teeny. \\n\\nDo make a reservation but still expect to wait for your food. Go with a large group of people and plan for it to be loud. Don't go with a date unless you're fighting and don't feel like hearing anything they have to say.  Ask to sit in the side room if it's available.\",\"date\":\"2013-01-20 13:25:59\"}\n"
 "{\"review_id\":\"G7XHMxG0bx9oBJNECG4IFg\",\"user_id\":\"jlu4CztcSxrKx56ba1a5AQ\",\"business_id\":\"3fw2X5bZYeW9xCz_zGhOHg\",\"stars\":3.0,\"useful\":5,\"funny\":4,\"cool\":5,\"text\":\"Tracy dessert had a big name in Hong Kong and the one in First Markham place has been here for many years now! \\n\\nCame in for some Chinese dessert, and I must say their selection has increased tremendously over the years. I might as well add that the price has also increased tremendously as well. The waitress gave us tea, which I could taste had red date in it. Fancy!\\n\\nA simple taro with coconut with tapioca pearls was like $5.25 or something. Basically all the desserts were more than $5. That's crazy! I can literally just make this dessert at home and for a bowl, it would probably cost like $0.50. A few years ago, I think I can still get it for like $3-$4, which is more reasonable, but wow, more than $5 is a little over the top for this dessert. Though I must say, it is Tracy Dessert, and they are a little more on the expensive side. \\n\\nI also saw other items on the menu like fish balls, chicken wings, shaved ice. My friend got a mango drink with fresh mango in it! \\n\\nI'm also surprised how many people come to Tracy Dessert after work. We came on a Sunday and the tables were always filled. I think the amount of tables they had were just perfect because no one really waited for seats for a long time, but the tables kept filling up once a table was finished.\",\"date\":\"2016-05-07 01:21:02\"}\n"
 "{\"review_id\":\"8e9HxxLjjqc9ez5ezzN7iQ\",\"user_id\":\"d6xvYpyzcfbF_AZ8vMB7QA\",\"business_id\":\"zvO-PJCpNk4fgAVUnExYAA\",\"stars\":1.0,\"useful\":3,\"funny\":1,\"cool\":1,\"text\":\"This place has gone down hill.  Clearly they have cut back on staff and food quality\\n\\nMany of the reviews were written before the menu changed.  I've been going for years and the food quality has gone down hill.\\n\\nThe service is slow & my salad, which was $15, was as bad as it gets.\\n\\nIt's just not worth spending the money on this place when there are so many other options.\",\"date\":\"2010-10-05 19:12:35\"}\n"
 "{\"review_id\":\"qrffudO73zsslZbe8B9D3Q\",\"user_id\":\"sG_h0dIzTKWa3Q6fmb4u-g\",\"business_id\":\"b2jN2mm9Wf3RcrZCgfo1cg\",\"stars\":2.0,\"useful\":1,\"funny\":0,\"cool\":0,\"text\":\"I was really looking forward to visiting after having some of their beers. The \\\"Man O'War\\\" quickly became my favorite DIPA; the Rusulka Vanilla Stout is a good thick, sweet stout; and the Ironclad is a top notch IPA. \\nThe only big miss on their beers I've had is the Big Chuck Barleywine. It could probably benefit greatly with age, but at this age all there is to taste is the alcohol.  \\nNonetheless, I had enough to convince me that the other beers I hadn't had from them would be top notch... and they are! \\nThe reason for the 2 stars should not reflect the quality of the brewers, they obviously know their craft well! \\nThe servers are great and friendly.... but relying on two servers to wait on 100+ customers says a lot about how inexperienced management must be. In fact, after waiting 15 mins at a dirty table I was finally able to track down someone I guessed was an employee to let them know we were even there! \\nAfter another 5+ mins, the GM finally stopped over to take our drink order. The smugness of this guy was amazing. The thought of offering a simple apology never seemed to enter into his head. \\nThis is the time a server finally stopped by to pick up the non-final check left by the party before us... who didn't seem very pleased when leaving. \\nThe toast & cheese was good, but by the time we were able to dig into their heartiest offering of food, saltines and butter may have been equally pleasing.\",\"date\":\"2015-01-18 14:04:18\"}\n"
 "{\"review_id\":\"RS_GTIT6836bCaPy637kNQ\",\"user_id\":\"nMeCE5-xsdleyxYuNZ_7rA\",\"business_id\":\"oxwGyA17NL6c5t1Etg5WgQ\",\"stars\":3.0,\"useful\":1,\"funny\":0,\"cool\":1,\"text\":\"It's a giant Best Buy with 66 registers.  I don't get it.  What's the big deal about this place??\",\"date\":\"2012-02-29 21:52:43\"}\n"
 "{\"review_id\":\"kbtscdyz6lvrtGjD1quQTg\",\"user_id\":\"FIk4lQQu1eTe2EpzQ4xhBA\",\"business_id\":\"8mIrX_LrOnAqWsB5JrOojQ\",\"stars\":4.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Like walking back in time, every Saturday morning my sister and I was in a bowling league and after we were done, we'd spend a few quarters playing the pin ball machines until our mother came to pick us up.\\n\\nMy sister was daring and play the machines hard, she was afraid of that \\\"tilt\\\" showing up and freezing the game.  I, on the other hand was a bit more gentler and wanted to make sure I got my quarter's worth.\\n\\nThis place has rows and rows of machines, some are really old and some are more of a mid 80's theme.  There is even a Ms pac man!  It was fun to spend an afternoon playing the machines and remembering all the fun of my early teen years.\",\"date\":\"2011-11-30 02:11:15\"}\n"
 "{\"review_id\":\"-I5umRTkhw15RqpKMl_o1Q\",\"user_id\":\"-mA3-1mN4JIEkqOtdbNXCQ\",\"business_id\":\"mRUVMJkUGxrByzMQ2MuOpA\",\"stars\":1.0,\"useful\":0,\"funny\":1,\"cool\":0,\"text\":\"Walked in around 4 on a Friday afternoon, we sat at a table just off the bar and walked out after 5 min or so. Don't even think they realized we walked in. However everyone at the bar noticed we walked in!!! Service was non existent at best. Not a good way for a new business to start out. Oh well, the location they are at has been about 5 different things over the past several years, so they will just be added to the list. SMDH!!!\",\"date\":\"2017-12-15 23:27:08\"}\n"
 "{\"review_id\":\"Z7wgXp98wYB57QdRY3HQ3w\",\"user_id\":\"GYNnVehQeXjty0xH7-6Fhw\",\"business_id\":\"FxLfqxdYPA6Z85PFKaqLrg\",\"stars\":4.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Wow. So surprised at the one and two star reviews!  We started with the most tender calamari. Although the marinara sauce was a bit bland, but a touch of salt made it just right. My husband had the veal with peppers and said it was so delicious and tender. The mashed potatoes were perfect. I had the salmon Diablo which was also delicious. Our salad was beautiful! Dressing was served on the salad and it was a nice amount. We ended our delicious meal with a piece of tiramisu. Our server Matt was right on!! Very pleasant and knowledgeable about the menu. Our appetizer, salad and entrees were timed perfectly. I love salad and did not mind that my entree was served while I was still eating it! No problem it let my dinner cool to just the right temp for me to eat it comfortably. \\nI wonder sometimes if people just don't appreciate relaxing and taking time to eat a wonderful and beautifully prepared meal.  A wonderful atmosphere. So relaxing. The chairs are super comfortable too!!! We will certainly be back. \\nGive it a try.  Don't  always go by the reviews. \\nA bottle of Riesling, calamari app, two delicious entrees and dessert for $92! \\nWell with it.\",\"date\":\"2016-05-07 01:36:53\"}\n"
 "{\"review_id\":\"qlXw1JQ0UodW7qrmVgwCXw\",\"user_id\":\"bAhqAPoWaZYcyYi7bs024Q\",\"business_id\":\"LUN6swQYa4xJKaM_UEUOEw\",\"stars\":4.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Michael from Red Carpet VIP is amazing ! I reached out because I needed help planning my soon to be sister in law's bachelorette. It was a group of 10 girls so I was a little overwhelmed but Michael saved the day! Everything was super smooth and easy! We got good deals and had the best time ever! We booked hotel and a bachelorette package for a great price. I have saved contact info because I will for sure reach out again on next Vegas trip!!!\",\"date\":\"2018-04-27 20:25:26\"}\n"
 "{\"review_id\":\"JVcjMhlavKKn3UIt9p9OXA\",\"user_id\":\"TpyOT5E16YASd7EWjLQlrw\",\"business_id\":\"AakkkTuGZA2KBodKi2_u8A\",\"stars\":1.0,\"useful\":1,\"funny\":1,\"cool\":0,\"text\":\"I cannot believe how things have changed in 3 years. I picked up duck congee sometime in the winter when my hubby was sick.  I was very disappointed because the ginger fish sauce tasted like it had gone bad (it should never be bitter).  Today, my hubby wanted to eat there since he was craving the duck congee and most places don't serve the duck & coleslaw side. We waited about 10 minutes to get our menu.  After we placed our orders, we waited another 5 minutes to get the tea that most places bring with the menu.  I could go on with the details but the gist of the story is they were understaffed or the staff was slow.  The worst part of it was that the service.  The servers make us feel bad for asking for anything (like when they took our order).  We had arrived and placed our order before another couple bside us at least 10 minutes ahead but somehow, this couple received their pho before mine.  They were almost done eating their pho before mine came out.\",\"date\":\"2012-07-16 00:37:14\"}\n"
 "{\"review_id\":\"svK3nBU7Rk8VfGorlrN52A\",\"user_id\":\"NJlxGtouq06hhC7sS2ECYw\",\"business_id\":\"YvrylyuWgbP90RgMqZQVnQ\",\"stars\":5.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"You can't really find anything wrong with this place, the pastas and pizzas are both amazing and high quality, the price is very reasonable, the owner and the staff are very friendly, if you're in downtown check this place out, a lot of people think just because it's downtown there are lots of options around but that's not always the case as there is also a lot of poor quality food in downtown as well.\",\"date\":\"2017-04-07 21:27:49\"}\n"
 "{\"review_id\":\"1wVA2-vQIuW_ClmXkDxqMQ\",\"user_id\":\"86J5DwcFk4f4In1Vxe2TvA\",\"business_id\":\"NyLYY8q1-H3hfsTwuwLPCg\",\"stars\":4.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Great lunch today. Staff was very helpful in assisting with selections and knowledgeable on the ingredients. We enjoyed the BBQ chicken with tika masala sauce and really good naan bread. The biryani with chicken was also yummy! Fun to see the food being prepared in the tandoori ovens. Great addition to the fast casual scene in Cleveland.\",\"date\":\"2015-01-03 22:47:34\"}\n"
 "{\"review_id\":\"6BnQwlxRn7ZuWdzninM9sQ\",\"user_id\":\"JSrP-dUmLlwZiI7Dp3PQ2A\",\"business_id\":\"cHdJXLlKNWixBXpDwEGb_A\",\"stars\":3.0,\"useful\":1,\"funny\":7,\"cool\":1,\"text\":\"I love chinese food and I love mexican food. What can go wrong? A couple of things. First things first, this place is more of a \\\"rice bowl\\\" kind of place. I thought it was going to be more diverse as far as the menu goes, but its mainly rice bowls you get with different kinds of meats. The ordering was a little confusing at first, but one of the employees helped us out and I got the 2-item bowl and got the jade chicken and hengrenade chicken with all rice(jerk). I also ordered a jade chicken quesadilla on the side.\\n\\nI'm gonna admit, this place looks kinda dirty. I don't think Arizona uses those health department letter grade system like California does, but if I were to just judge by how it looked inside, i'd give it a \\\"C\\\" grade lol. We waited for about 15 minutes or so and finally got our food. We took it to go and ate at our hotel room. \\n\\nMmmm... the food was just alright. The jade chicken was nothing special. It tasted like any generic chinese fast food orange chicken\\/sesame chicken variant. The hengrenade chicken, although was the less spicier version of the jerk chicken, was still pretty spicy for me. Just be warned the jerk chicken is super spicy. If you aren't sure, ask for a sample at the restaurant before ordering, but it was way too spicy for me. \\n\\nThe jade chicken quesadilla was decent, but nothing special. Just imagine orange chicken in between a tortilla and cheese. A friend of mine ordered a jade chicken burrito and we were confused when we pulled it out of the bag because it was literally the size of Mcdonald's apple pie. If you order the burrito, be warned that it's a burrito for gnomes and smurfs, but he said it was tasty. \\n\\nThey provide a snicker doodle sugar cookie for each meal and it was decent, again nothing special. \\n\\nNot gonna lie, the next day my stomach felt like a little mexican dude and chinese dude were wrestling and throwing molotov cocktails inside. I used the bathroom like 5 times. I don't recommend eating this place if you have a lot to do the next day.\",\"date\":\"2015-04-01 16:30:00\"}\n"
 "{\"review_id\":\"rEITo90tpyKmEfNDp3Ou3A\",\"user_id\":\"6Fz_nus_OG4gar721OKgZA\",\"business_id\":\"6lj2BJ4tJeu7db5asGHQ4w\",\"stars\":5.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"We've been a huge Slim's fan since they opened one up in Texas about two years ago when we used to live there. This place never disappoints. They even have great salads and grilled chicken. Plus they have fresh brewed sweet tea, it's the best!\",\"date\":\"2017-05-26 01:23:19\"}\n"
 "{\"review_id\":\"4bUyL7lzoWzDZaJETAKREg\",\"user_id\":\"_N7Ndn29bpll_961oPeEfw\",\"business_id\":\"y-Iw6dZflNix4BdwIyTNGA\",\"stars\":3.0,\"useful\":0,\"funny\":0,\"cool\":0,\"text\":\"Good selection of classes of beers and mains. I've been here twice.\\n\\nFirst time I had the fried chicken. It was delicious, but be warned, extremely salty. I couldn't even finish the last piece of chicken after experiencing a salt overload.\\n\\nSecond time we came on a wednesday. We didn't know it was BBQ night, where they have a completely different menu, and don't offer anything from their original vegetarian-friendly menu. This menu has one vegetarian-friendly option - an eggplant sandwich. The vegetarian in my party said it was awful. Also, on BBQ night you choose 2 sides. Except they were out of all their sides except 2 - fries and potato salad. I can't say I was thrilled to have carb heavy sides with my carb heavy main. How do you run out of sides so early in the evening?\\n\\nService not so great.\\n\\nI'd avoid coming here on wednesdays.\",\"date\":\"2014-06-27 21:19:23\"}\n"
--- a/reproduction/text_classification/test/test_yelp.py
+++ b/reproduction/text_classification/test/test_yelp.py
@@ -0,0 +1,7 @@
 import unittest
 from reproduction.text_classification.data.yelpLoader import yelpLoader
 class TestDatasetLoader(unittest.TestCase):
    def test_yelpLoader(self):
        ds = yelpLoader().load('sample_yelp.json')
        assert len(ds) == 20
--- a/reproduction/text_classification/train_bert.py
+++ b/reproduction/text_classification/train_bert.py
--- a/reproduction/text_classification/train_char_cnn.py
+++ b/reproduction/text_classification/train_char_cnn.py
--- a/reproduction/text_classification/train_dpcnn.py
+++ b/reproduction/text_classification/train_dpcnn.py
--- a/reproduction/text_classification/utils/place_holder.txt
+++ b/reproduction/text_classification/utils/place_holder.txt
--- a/reproduction/utils.py
+++ b/reproduction/utils.py
@@ -0,0 +1,51 @@
 import os
 from typing import Union, Dict
 def check_dataloader_paths(paths:Union[str, Dict[str, str]])->Dict[str, str]:
    """
    检查传入dataloader的文件的合法性。如果为合法路径，将返回至少包含'train'这个key的dict。类似于下面的结果
    {
        'train': '/some/path/to/', # 一定包含，建词表应该在这上面建立，剩下的其它文件应该只需要处理并index。
        'test': 'xxx' # 可能有，也可能没有
        ...
    }
    如果paths为不合法的，将直接进行raise相应的错误
    :param paths: 路径
    :return:
    """
    if isinstance(paths, str):
        if os.path.isfile(paths):
            return {'train': paths}
        elif os.path.isdir(paths):
            train_fp = os.path.join(paths, 'train.txt')
            if not os.path.isfile(train_fp):
                raise FileNotFoundError(f"train.txt is not found in folder {paths}.")
            files = {'train': train_fp}
            for filename in ['test.txt', 'dev.txt']:
                fp = os.path.join(paths, filename)
                if os.path.isfile(fp):
                    files[filename.split('.')[0]] = fp
            return files
        else:
            raise FileNotFoundError(f"{paths} is not a valid file path.")
    elif isinstance(paths, dict):
        if paths:
            if 'train' not in paths:
                raise KeyError("You have to include `train` in your dict.")
            for key, value in paths.items():
                if isinstance(key, str) and isinstance(value, str):
                    if not os.path.isfile(value):
                        raise TypeError(f"{value} is not a valid file.")
                else:
                    raise TypeError("All keys and values in paths should be str.")
            return paths
        else:
            raise ValueError("Empty paths is not allowed.")
    else:
        raise TypeError(f"paths only supports str and dict. not {type(paths)}.")
--- a/test/core/test_callbacks.py
+++ b/test/core/test_callbacks.py
@@ -12,6 +12,7 @@ from fastNLP import AccuracyMetric
 from fastNLP import SGD
 from fastNLP import Trainer
 from fastNLP.models.base_model import NaiveClassifier
 from fastNLP.core.callback import EarlyStopError
 def prepare_env():
--- a/test/core/test_field.py
+++ b/test/core/test_field.py
@@ -1,8 +1,55 @@
 import unittest
 import numpy as np
 import torch
 from fastNLP import FieldArray
 from fastNLP.core.field import _get_ele_type_and_dim
 from fastNLP import AutoPadder
 class TestFieldArrayTyepDimDetect(unittest.TestCase):
    """
    检测FieldArray能否正确识别type与ndim
    """
    def test_case1(self):
        # 1.1 常规类型测试
        for value in [1, True, 1.0, 'abc']:
            type_ = type(value)
            _type, _dim = _get_ele_type_and_dim(cell=value)
            self.assertListEqual([_type, _dim], [type_, 0])
        # 1.2 mix类型报错
        with self.assertRaises(Exception):
            value = [1, 2, 1.0]
            self.assertRaises(_get_ele_type_and_dim(value))
        # 带有numpy的测试
        # 2.1
        value = np.array([1, 2, 3])
        type_ = value.dtype
        dim_ = 1
        self.assertSequenceEqual(_get_ele_type_and_dim(cell=value), [type_, dim_])
        # 2.2
        value = np.array([[1, 2], [3, 4, 5]]) # char  embedding的场景
        self.assertSequenceEqual([int, 2], _get_ele_type_and_dim(value))
        # 2.3
        value = np.zeros((3, 4))
        self.assertSequenceEqual([value.dtype, 2], _get_ele_type_and_dim(value))
        # 2.4 测试错误的dimension
        with self.assertRaises(Exception):
            value = np.array([[1, 2], [3, [1]]])
            _get_ele_type_and_dim(value)
        # 2.5 测试混合类型
        with self.assertRaises(Exception):
            value = np.array([[1, 2], [3.0]])
            _get_ele_type_and_dim(value)
        # 带有tensor的测试
        # 3.1 word embedding的场景
        value = torch.zeros(3, 10)
        self.assertSequenceEqual([value.dtype, 2], _get_ele_type_and_dim(value))
        # 3.2 char embedding/image的场景
        value = torch.zeros(3, 32, 32)
        self.assertSequenceEqual([value.dtype, 3], _get_ele_type_and_dim(value))
 class TestFieldArrayInit(unittest.TestCase):
@@ -31,12 +78,6 @@ class TestFieldArrayInit(unittest.TestCase):
        # 三维list
        fa = FieldArray("x", [[[1, 2], [3, 4]], [[1, 2], [3, 4]]], is_input=True)
    def test_init_v7(self):
        # list of array
        fa = FieldArray("x", [np.array([[1, 2], [3, 4]]), np.array([[1, 2], [3, 4]])], is_input=True)
        self.assertEqual(fa.pytype, int)
        self.assertEqual(fa.dtype, np.int)
    def test_init_v4(self):
        # 一维list
        val = [1, 2, 3, 4]
@@ -56,6 +97,11 @@ class TestFieldArrayInit(unittest.TestCase):
        fa.append(val)
    def test_init_v7(self):
        # list of array
        fa = FieldArray("x", [np.array([[1, 2], [3, 4]]), np.array([[1, 2], [3, 4]])], is_input=True)
        self.assertEqual(fa.dtype, np.array([1]).dtype)
    def test_init_v8(self):
        # 二维list
        val = np.array([[1, 2], [3, 4]])
        fa = FieldArray("x", [val], is_input=True)
@@ -79,33 +125,23 @@ class TestFieldArray(unittest.TestCase):
        self.assertListEqual(list(fa.get([0, 1, 2])), [1, 2, 3])
    def test_type_conversion(self):
        fa = FieldArray("x", [1.2, 2.2, 3, 4, 5], is_input=True)
        self.assertEqual(fa.pytype, float)
        self.assertEqual(fa.dtype, np.float64)
        fa = FieldArray("x", [1, 2, 3, 4, 5], is_input=True)
        fa.append(1.3333)
        self.assertEqual(fa.pytype, float)
        self.assertEqual(fa.dtype, np.float64)
        self.assertEqual(fa.dtype, int)
        fa = FieldArray("y", [1.1, 2.2, 3.3, 4.4, 5.5], is_input=True)
        fa.append(10)
        self.assertEqual(fa.pytype, float)
        self.assertEqual(fa.dtype, np.float64)
        fa.append(10.0)
        self.assertEqual(fa.dtype, float)
        fa = FieldArray("y", ["a", "b", "c", "d"], is_input=True)
        fa.append("e")
        self.assertEqual(fa.dtype, np.str)
        self.assertEqual(fa.pytype, str)
        self.assertEqual(fa.dtype, str)
    def test_support_np_array(self):
        fa = FieldArray("y", np.array([[1.1, 2.2, 3.3, 4.4, 5.5]]), is_input=True)
        self.assertEqual(fa.dtype, np.float64)
        self.assertEqual(fa.pytype, float)
        fa.append(np.array([1.1, 2.2, 3.3, 4.4, 5.5]))
        self.assertEqual(fa.dtype, np.float64)
        self.assertEqual(fa.pytype, float)
        fa = FieldArray("my_field", np.random.rand(3, 5), is_input=True)
        # in this case, pytype is actually a float. We do not care about it.
@@ -113,11 +149,10 @@ class TestFieldArray(unittest.TestCase):
    def test_nested_list(self):
        fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.1, 2.2, 3.3, 4.4, 5.5]], is_input=True)
        self.assertEqual(fa.pytype, float)
        self.assertEqual(fa.dtype, np.float64)
        self.assertEqual(fa.dtype, float)
    def test_getitem_v1(self):
        fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True)
        fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True)
        self.assertEqual(fa[0], [1.1, 2.2, 3.3, 4.4, 5.5])
        ans = fa[[0, 1]]
        self.assertTrue(isinstance(ans, np.ndarray))
@@ -150,7 +185,7 @@ class TestFieldArray(unittest.TestCase):
            fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True)
            fa.append(["str", 0, 0, 0, 1.89])
        fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True)
        fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True)
        fa.append([1.2, 2.3, 3.4, 4.5, 5.6])
        self.assertEqual(len(fa), 3)
        self.assertEqual(fa[2], [1.2, 2.3, 3.4, 4.5, 5.6])
@@ -163,33 +198,86 @@ class TestFieldArray(unittest.TestCase):
        fa = FieldArray("y", [(1, "1"), (2, "2"), (3, "3"), (4, "4")], is_target=True, ignore_type=True)
 class TestPadder(unittest.TestCase):
 class TestAutoPadder(unittest.TestCase):
    def test00(self):
        padder = AutoPadder()
        # 没有类型时
        contents = [(1, 2), ('str', 'a')]
        padder(contents, None, None, None)
    def test01(self):
        """
        测试AutoPadder能否正常工作
        :return:
        """
        from fastNLP import AutoPadder
        # 测试使用多维的bool, int, str, float的情况
        # str
        padder = AutoPadder()
        content = ['This is a str', 'this is another str']
        self.assertListEqual(content, padder(content, None, np.str).tolist())
        self.assertListEqual(content, padder(content, None, str, 0).tolist())
        content = [1, 2]
        self.assertListEqual(content, padder(content, None, np.int64).tolist())
        content = [[1,2], [3], [4]]
        self.assertListEqual([[1,2], [3, 0], [4, 0]],
                              padder(content, None, np.int64).tolist())
        # 1维int
        content = [[1, 2, 3], [4,], [5, 6, 7, 8]]
        padded_content = [[1, 2, 3, 0], [4, 0, 0, 0], [5, 6, 7, 8]]
        self.assertListEqual(padder(content, None, int, 1).tolist(), padded_content)
        # 二维int
        padded_content = [[[1, 2, 3, 0], [4, 5, 0, 0], [7, 8, 9, 10]], [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
        content = [
                        [[1, 2, 3], [4, 5], [7,8,9,10]],
                        [[1]]
                    ]
        self.assertListEqual(content,
                             padder(content, None, np.int64).tolist())
            [[1, 2, 3], [4, 5], [7, 8, 9, 10]],
            [[1]]
        ]
        self.assertListEqual(padder(content, None, int, 2).tolist(), padded_content)
        # 3维图片
        contents = [np.random.rand(3, 4, 4).tolist() for _ in range(5)]
        self.assertTrue(padder(contents, None, float, 3).shape==(5, 3, 4, 4))
        # 更高维度直接返回
        contents = [np.random.rand(24, 3, 4, 4).tolist() for _ in range(5)]
        self.assertTrue(isinstance(padder(contents, None, float, 4), np.ndarray))
    def test02(self):
        padder = AutoPadder()
        # 测试numpy的情况
        # 0维
        contents = np.arange(12)
        self.assertListEqual(padder(contents, None, contents.dtype, 0).tolist(), contents.tolist())
        # 1维
        contents = np.arange(12).reshape((3, 4))
        self.assertListEqual(padder(contents, None, contents.dtype, 1).tolist(), contents.tolist())
        # 2维
        contents = np.ones((3, 10, 5))
        self.assertListEqual(padder(contents, None, contents.dtype, 2).tolist(), contents.tolist())
        # 3维
        contents = [np.random.rand(3, 4, 4) for _ in range(5)]
        l_contents = [content.tolist() for content in contents]
        self.assertListEqual(padder(contents, None, contents[0].dtype, 3).tolist(), l_contents)
    def test03(self):
        padder = AutoPadder()
        # 测试tensor的情况
        # 0维
        contents = torch.arange(12)
        r_contents = padder(contents, None, contents.dtype, 0)
        self.assertSequenceEqual(r_contents.tolist(), contents.tolist())
        self.assertTrue(r_contents.dtype==contents.dtype)
        # 0维
        contents = [torch.tensor(1) for _ in range(10)]
        self.assertSequenceEqual(padder(contents, None, torch.int64, 0).tolist(), contents)
        # 1维
        contents = torch.randn(3, 4)
        padder(contents, None, torch.float64, 1)
        # 3维
        contents = [torch.randn(3, 4, 4) for _ in range(5)]
        padder(contents, None, torch.float64, 3)
 class TestEngChar2DPadder(unittest.TestCase):
    def test01(self):
        """
        测试EngChar2DPadder能不能正确使用
        :return:
@@ -198,38 +286,31 @@ class TestPadder(unittest.TestCase):
        padder = EngChar2DPadder(pad_length=0)
        contents = [1, 2]
        # 不能是1维
        with self.assertRaises(ValueError):
            padder(contents, None, np.int64)
        # 不能是0维
        with self.assertRaises(Exception):
            padder(contents, None, np.int64, 0)
        contents = [[1, 2]]
        # 不能是2维
        with self.assertRaises(ValueError):
            padder(contents, None, np.int64)
        contents = [[[[1, 2]]]]
        # 不能是1维
        with self.assertRaises(Exception):
            padder(contents, None, np.int64, 1)
        contents = [
                    [[[[1, 2]]]]
                   ]
        # 不能是3维以上
        with self.assertRaises(ValueError):
            padder(contents, None, np.int64)
        with self.assertRaises(Exception):
            padder(contents, None, np.int64, 3)
        contents = [
                        [[1, 2, 3], [4, 5], [7,8,9,10]],
                        [[1]]
                    ]
        self.assertListEqual([[[1, 2, 3, 0], [4, 5, 0, 0], [7, 8, 9, 10]], [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
                             padder(contents, None, np.int64).tolist())
                             padder(contents, None, np.int64, 2).tolist())
        padder = EngChar2DPadder(pad_length=5, pad_val=-100)
        self.assertListEqual(
            [[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]],
             [[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]],
            padder(contents, None, np.int64).tolist()
            padder(contents, None, np.int64, 2).tolist()
        )
    def test_None_dtype(self):
        from fastNLP import AutoPadder
        padder = AutoPadder()
        content = [
            [[1, 2, 3], [4, 5], [7, 8, 9, 10]],
            [[1]]
        ]
        ans = padder(content, None, None).tolist()
        self.assertListEqual(content, ans)
--- a/test/core/test_utils.py
+++ b/test/core/test_utils.py
@@ -18,7 +18,7 @@ class Model(nn.Module):
        self.param = nn.Parameter(torch.zeros(0))
 class TestMoveModelDeivce(unittest.TestCase):
 class TestMoveModelDevice(unittest.TestCase):
    def test_case1(self):
        # 测试str
        model = Model()
--- a/test/io/test_dataset_loader.py
+++ b/test/io/test_dataset_loader.py
@@ -1,6 +1,8 @@
 import unittest
 import os
 from fastNLP.io import Conll2003Loader, PeopleDailyCorpusLoader, CSVLoader, SNLILoader, JsonLoader
 from fastNLP.io.dataset_loader import SSTLoader
 from reproduction.text_classification.data.yelpLoader import yelpLoader
 class TestDatasetLoader(unittest.TestCase):
@@ -28,3 +30,34 @@ class TestDatasetLoader(unittest.TestCase):
    def test_JsonLoader(self):
        ds = JsonLoader().load('test/data_for_tests/sample_snli.jsonl')
        assert len(ds) == 3
    def test_SST(self):
        train_data = """(3 (2 (2 The) (2 Rock)) (4 (3 (2 is) (4 (2 destined) (2 (2 (2 (2 (2 to) (2 (2 be) (2 (2 the) (2 (2 21st) (2 (2 (2 Century) (2 's)) (2 (3 new) (2 (2 ``) (2 Conan)))))))) (2 '')) (2 and)) (3 (2 that) (3 (2 he) (3 (2 's) (3 (2 going) (3 (2 to) (4 (3 (2 make) (3 (3 (2 a) (3 splash)) (2 (2 even) (3 greater)))) (2 (2 than) (2 (2 (2 (2 (1 (2 Arnold) (2 Schwarzenegger)) (2 ,)) (2 (2 Jean-Claud) (2 (2 Van) (2 Damme)))) (2 or)) (2 (2 Steven) (2 Segal))))))))))))) (2 .)))
 (4 (4 (4 (2 The) (4 (3 gorgeously) (3 (2 elaborate) (2 continuation)))) (2 (2 (2 of) (2 ``)) (2 (2 The) (2 (2 (2 Lord) (2 (2 of) (2 (2 the) (2 Rings)))) (2 (2 '') (2 trilogy)))))) (2 (3 (2 (2 is) (2 (2 so) (2 huge))) (2 (2 that) (3 (2 (2 (2 a) (2 column)) (2 (2 of) (2 words))) (2 (2 (2 (2 can) (1 not)) (3 adequately)) (2 (2 describe) (2 (3 (2 (2 co-writer\/director) (2 (2 Peter) (3 (2 Jackson) (2 's)))) (3 (2 expanded) (2 vision))) (2 (2 of) (2 (2 (2 J.R.R.) (2 (2 Tolkien) (2 's))) (2 Middle-earth))))))))) (2 .)))
 (3 (3 (2 (2 (2 (2 (2 Singer\/composer) (2 (2 Bryan) (2 Adams))) (2 (2 contributes) (2 (2 (2 a) (2 slew)) (2 (2 of) (2 songs))))) (2 (2 --) (2 (2 (2 (2 a) (2 (2 few) (3 potential))) (2 (2 (2 hits) (2 ,)) (2 (2 (2 a) (2 few)) (1 (1 (2 more) (1 (2 simply) (2 intrusive))) (2 (2 to) (2 (2 the) (2 story))))))) (2 --)))) (2 but)) (3 (4 (2 the) (3 (2 whole) (2 package))) (2 (3 certainly) (3 (2 captures) (2 (1 (2 the) (2 (2 (2 intended) (2 (2 ,) (2 (2 er) (2 ,)))) (3 spirit))) (2 (2 of) (2 (2 the) (2 piece)))))))) (2 .))
 (2 (2 (2 You) (2 (2 'd) (2 (2 think) (2 (2 by) (2 now))))) (2 (2 America) (2 (2 (2 would) (1 (2 have) (2 (2 (2 had) (1 (2 enough) (2 (2 of) (2 (2 plucky) (2 (2 British) (1 eccentrics)))))) (4 (2 with) (4 (3 hearts) (3 (2 of) (3 gold))))))) (2 .))))
 """
        test_data = """(3 (2 Yet) (3 (2 (2 the) (2 act)) (3 (4 (3 (2 is) (3 (2 still) (4 charming))) (2 here)) (2 .))))
 (4 (2 (2 Whether) (2 (2 (2 (2 or) (1 not)) (3 (2 you) (2 (2 're) (3 (3 enlightened) (2 (2 by) (2 (2 any) (2 (2 of) (2 (2 Derrida) (2 's))))))))) (2 (2 lectures) (2 (2 on) (2 (2 ``) (2 (2 (2 (2 (2 (2 the) (2 other)) (2 '')) (2 and)) (2 ``)) (2 (2 the) (2 self)))))))) (3 (2 ,) (3 (2 '') (3 (2 Derrida) (3 (3 (2 is) (4 (2 an) (4 (4 (2 undeniably) (3 (4 (3 fascinating) (2 and)) (4 playful))) (2 fellow)))) (2 .))))))
 (4 (3 (2 (2 Just) (2 (2 the) (2 labour))) (3 (2 involved) (3 (2 in) (4 (2 creating) (3 (3 (2 the) (3 (3 layered) (2 richness))) (3 (2 of) (3 (2 (2 the) (2 imagery)) (2 (2 in) (3 (2 (2 this) (2 chiaroscuro)) (2 (2 of) (2 (2 (2 madness) (2 and)) (2 light)))))))))))) (3 (3 (2 is) (4 astonishing)) (2 .)))
 (3 (3 (2 Part) (3 (2 of) (4 (2 (2 the) (3 charm)) (2 (2 of) (2 (2 Satin) (2 Rouge)))))) (3 (3 (2 is) (3 (2 that) (3 (2 it) (2 (1 (2 avoids) (2 (2 the) (1 obvious))) (3 (2 with) (3 (3 (3 humour) (2 and)) (2 lightness))))))) (2 .)))
 (4 (2 (2 a) (2 (2 screenplay) (2 more))) (3 (4 ingeniously) (2 (2 constructed) (2 (2 (2 (2 than) (2 ``)) (2 Memento)) (2 '')))))
 (3 (2 ``) (3 (2 (2 Extreme) (2 Ops)) (3 (2 '') (4 (4 (3 exceeds) (2 expectations)) (2 .)))))
 """
        train, test = 'train--', 'test--'
        with open(train, 'w', encoding='utf-8') as f:
            f.write(train_data)
        with open(test, 'w', encoding='utf-8') as f:
            f.write(test_data)
        loader = SSTLoader()
        info = loader.process(
            {train: train, test: test},
            train_ds=[train],
            src_vocab_op=dict(min_freq=2)
        )
        assert len(list(info.vocabs.items())) == 2
        assert len(list(info.datasets.items())) == 2
        print(info.vocabs)
        print(info.datasets)
        os.remove(train), os.remove(test)
--- a/test/models/test_bert.py
+++ b/test/models/test_bert.py
@@ -2,20 +2,64 @@ import unittest
 import torch
 from fastNLP.models.bert import BertModel
 from fastNLP.models.bert import *
 class TestBert(unittest.TestCase):
    def test_bert_1(self):
        # model = BertModel.from_pretrained("/home/zyfeng/data/bert-base-chinese")
        model = BertModel(vocab_size=32000, hidden_size=768,
                          num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
        from fastNLP.core.const import Const
        model = BertForSequenceClassification(2)
        input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
        input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
        token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
        pred = model(input_ids, token_type_ids, input_mask)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUT in pred)
        self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2, 2))
    def test_bert_2(self):
        from fastNLP.core.const import Const
        model = BertForMultipleChoice(2)
        input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
        input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
        token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
        pred = model(input_ids, token_type_ids, input_mask)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUT in pred)
        self.assertEqual(tuple(pred[Const.OUTPUT].shape), (1, 2))
    def test_bert_3(self):
        from fastNLP.core.const import Const
        model = BertForTokenClassification(7)
        input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
        input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
        token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
        pred = model(input_ids, token_type_ids, input_mask)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUT in pred)
        self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2, 3, 7))
    def test_bert_4(self):
        from fastNLP.core.const import Const
        model = BertForQuestionAnswering()
        input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
        input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
        token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
        all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
        for layer in all_encoder_layers:
            self.assertEqual(tuple(layer.shape), (2, 3, 768))
        self.assertEqual(tuple(pooled_output.shape), (2, 768))
        pred = model(input_ids, token_type_ids, input_mask)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUTS(0) in pred)
        self.assertTrue(Const.OUTPUTS(1) in pred)
        self.assertEqual(tuple(pred[Const.OUTPUTS(0)].shape), (2, 3))
        self.assertEqual(tuple(pred[Const.OUTPUTS(1)].shape), (2, 3))
--- a/test/modules/encoder/test_bert.py
+++ b/test/modules/encoder/test_bert.py
@@ -0,0 +1,21 @@
 import unittest
 import torch
 from fastNLP.models.bert import BertModel
 class TestBert(unittest.TestCase):
    def test_bert_1(self):
        model = BertModel(vocab_size=32000, hidden_size=768,
                          num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
        input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
        input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
        token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
        all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
        for layer in all_encoder_layers:
            self.assertEqual(tuple(layer.shape), (2, 3, 768))
        self.assertEqual(tuple(pooled_output.shape), (2, 768))