* fix processor.py

* add code comments * merge *_saver.py & *_loader.py in io/ * (ancient codes) rename Loss into LossFromTorch
6 years ago · 27e9453d19
--- a/fastNLP/api/model_zoo.py
+++ b/fastNLP/api/model_zoo.py
@@ -1,5 +1,3 @@
 import torch

 import hashlib
 import os
 import re
@@ -7,6 +5,8 @@ import shutil
 import sys
 import tempfile

 import torch

 try:
    from requests.utils import urlparse
    from requests import get as urlopen
@@ -132,7 +132,3 @@ if tqdm is None:

            sys.stderr.write('\n')


 if __name__ == '__main__':
    pipeline = load_url('http://10.141.208.102:5000/file/download/infer_context-4e86fd93.pkl', model_dir='.')
    print(type(pipeline))
--- a/fastNLP/api/processor.py
+++ b/fastNLP/api/processor.py
@@ -1,14 +1,15 @@
 import torch
 from collections import defaultdict
 import re
 from collections import defaultdict

 import torch

 from fastNLP.core.dataset import DataSet
 from fastNLP.core.vocabulary import Vocabulary
 from fastNLP.core.batch import Batch
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.sampler import SequentialSampler
 from fastNLP.core.vocabulary import Vocabulary


 class Processor:
 class Processor(object):
    def __init__(self, field_name, new_added_field_name):
        self.field_name = field_name
        if new_added_field_name is None:
@@ -17,7 +18,7 @@ class Processor:
            self.new_added_field_name = new_added_field_name

    def process(self, *args, **kwargs):
        pass
        raise NotImplementedError

    def __call__(self, *args, **kwargs):
        return self.process(*args, **kwargs)
@@ -132,13 +133,14 @@ class Num2TagProcessor(Processor):


 class IndexerProcessor(Processor):
    def __init__(self, vocab, field_name, new_added_field_name, delete_old_field=False):
    def __init__(self, vocab, field_name, new_added_field_name, delete_old_field=False, is_input=True):

        assert isinstance(vocab, Vocabulary), "Only Vocabulary class is allowed, not {}.".format(type(vocab))

        super(IndexerProcessor, self).__init__(field_name, new_added_field_name)
        self.vocab = vocab
        self.delete_old_field = delete_old_field
        self.is_input = is_input

    def set_vocab(self, vocab):
        assert isinstance(vocab, Vocabulary), "Only Vocabulary class is allowed, not {}.".format(type(vocab))
@@ -146,13 +148,14 @@ class IndexerProcessor(Processor):
        self.vocab = vocab

    def process(self, dataset):
        assert isinstance(dataset, DataSet), "Only Dataset class is allowed, not {}.".format(type(dataset))
        assert isinstance(dataset, DataSet), "Only DataSet class is allowed, not {}.".format(type(dataset))
        for ins in dataset:
            tokens = ins[self.field_name]
            index = [self.vocab.to_index(token) for token in tokens]
            ins[self.new_added_field_name] = index

        dataset._set_need_tensor(**{self.new_added_field_name: True})
        if self.is_input:
            dataset.set_input(self.new_added_field_name)

        if self.delete_old_field:
            dataset.delete_field(self.field_name)
@@ -161,6 +164,9 @@ class IndexerProcessor(Processor):


 class VocabProcessor(Processor):
    """Build vocabulary with a field in the data set.

    """
    def __init__(self, field_name):
        super(VocabProcessor, self).__init__(field_name, None)
        self.vocab = Vocabulary()
@@ -178,17 +184,20 @@ class VocabProcessor(Processor):


 class SeqLenProcessor(Processor):
    def __init__(self, field_name, new_added_field_name='seq_lens'):
    def __init__(self, field_name, new_added_field_name='seq_lens', is_input=True):
        super(SeqLenProcessor, self).__init__(field_name, new_added_field_name)
        self.is_input = is_input

    def process(self, dataset):
        assert isinstance(dataset, DataSet), "Only Dataset class is allowed, not {}.".format(type(dataset))
        for ins in dataset:
            length = len(ins[self.field_name])
            ins[self.new_added_field_name] = length
        dataset._set_need_tensor(**{self.new_added_field_name: True})
        if self.is_input:
            dataset.set_input(self.new_added_field_name)
        return dataset


 class ModelProcessor(Processor):
    def __init__(self, model, seq_len_field_name='seq_lens', batch_size=32):
        """
@@ -238,6 +247,7 @@ class ModelProcessor(Processor):
        device = torch.device(device)
        self.model.to(device)


 class Index2WordProcessor(Processor):
    def __init__(self, vocab, field_name, new_added_field_name):
        super(Index2WordProcessor, self).__init__(field_name, new_added_field_name)
@@ -251,6 +261,7 @@ class Index2WordProcessor(Processor):


 class SetTensorProcessor(Processor):
    # TODO: remove it. It is strange.
    def __init__(self, field_dict, default=False):
        super(SetTensorProcessor, self).__init__(None, None)
        self.field_dict = field_dict
@@ -264,6 +275,7 @@ class SetTensorProcessor(Processor):


 class SetIsTargetProcessor(Processor):
    # TODO; remove it.
    def __init__(self, field_dict, default=False):
        super(SetIsTargetProcessor, self).__init__(None, None)
        self.field_dict = field_dict
--- a/fastNLP/core/init.py
+++ b/fastNLP/core/init.py
@@ -2,7 +2,7 @@ from .batch import Batch
 from .dataset import DataSet
 from .fieldarray import FieldArray
 from .instance import Instance
 from .losses import Loss
 from .losses import LossFromTorch
 from .optimizer import Optimizer
 from .sampler import SequentialSampler, BucketSampler, RandomSampler, BaseSampler
 from .tester import Tester
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -9,32 +9,20 @@ from fastNLP.core.utils import get_func_signature
 _READERS = {}


 def construct_dataset(sentences):
    """Construct a data set from a list of sentences.

    :param sentences: list of list of str
    :return dataset: a DataSet object
    """
    dataset = DataSet()
    for sentence in sentences:
        instance = Instance()
        instance['raw_sentence'] = sentence
        dataset.append(instance)
    return dataset


 class DataSet(object):
    """DataSet is the collection of examples.
    DataSet provides instance-level interface. You can append and access an instance of the DataSet.
    However, it stores data in a different way: Field-first, Instance-second.

    """

    def __init__(self, data=None):
        """

        :param data: a dict or a list. If it is a dict, the key is the name of a field and the value is the field.
                All values must be of the same length.
                If it is a list, it must be a list of Instance objects.
        :param data: a dict or a list.
                If `data` is a dict, the key is the name of a FieldArray and the value is the FieldArray. All values
                must be of the same length.
                If `data` is a list, it must be a list of Instance objects.
        """
        self.field_arrays = {}
        if data is not None:
@@ -60,6 +48,7 @@ class DataSet(object):
        def iter_func():
            for idx in range(len(self)):
                yield self[idx]

        return iter_func()

    def _inner_iter(self):
@@ -69,7 +58,8 @@ class DataSet(object):
                self.idx = idx

            def __getitem__(self, item):
                assert item in self.dataset.field_arrays, "no such field:{} in Instance {}".format(item, self.dataset[self.idx])
                assert item in self.dataset.field_arrays, "no such field:{} in Instance {}".format(item, self.dataset[
                    self.idx])
                assert self.idx < len(self.dataset.field_arrays[item]), "index:{} out of range".format(self.idx)
                return self.dataset.field_arrays[item][self.idx]

@@ -79,6 +69,7 @@ class DataSet(object):
        def inner_iter_func():
            for idx in range(len(self)):
                yield Iter_ptr(self, idx)

        return inner_iter_func()

    def __getitem__(self, idx):
@@ -217,9 +208,17 @@ class DataSet(object):
                raise KeyError("{} is not a valid field name.".format(name))

    def get_input_name(self):
        """Get all field names with `is_input` as True.

        :return list field_names: a list of str
        """
        return [name for name, field in self.field_arrays.items() if field.is_input]

    def get_target_name(self):
        """Get all field names with `is_target` as True.

        :return list field_names: a list of str
        """
        return [name for name, field in self.field_arrays.items() if field.is_target]

    @classmethod
@@ -243,7 +242,7 @@ class DataSet(object):
        :return results: if new_field_name is not passed, returned values of the function over all instances.
        """
        results = [func(ins) for ins in self._inner_iter()]
        if len(list(filter(lambda x: x is not None, results)))==0: # all None
        if len(list(filter(lambda x: x is not None, results))) == 0:  # all None
            raise ValueError("{} always return None.".format(get_func_signature(func=func)))

        extra_param = {}
@@ -269,6 +268,12 @@ class DataSet(object):
            return results

    def drop(self, func):
        """Drop instances if a condition holds.

        :param func: a function that takes an Instance object as input, and returns bool.
            The instance will be dropped if the function returns True.

        """
        results = [ins for ins in self._inner_iter() if not func(ins)]
        for name, old_field in self.field_arrays.items():
            self.field_arrays[name].content = [ins[name] for ins in results]
@@ -338,10 +343,33 @@ class DataSet(object):
        return cls(_dict)

    def save(self, path):
        """Save the DataSet object as pickle.

        :param str path: the path to the pickle
        """
        with open(path, 'wb') as f:
            pickle.dump(self, f)

    @staticmethod
    def load(path):
        """Load a DataSet object from pickle.

        :param str path: the path to the pickle
        :return DataSet data_set:
        """
        with open(path, 'rb') as f:
            return pickle.load(f)


 def construct_dataset(sentences):
    """Construct a data set from a list of sentences.

    :param sentences: list of list of str
    :return dataset: a DataSet object
    """
    dataset = DataSet()
    for sentence in sentences:
        instance = Instance()
        instance['raw_sentence'] = sentence
        dataset.append(instance)
    return dataset
--- a/fastNLP/core/losses.py
+++ b/fastNLP/core/losses.py
@@ -7,14 +7,13 @@ import torch.nn.functional as F
 from fastNLP.core.utils import CheckError
 from fastNLP.core.utils import CheckRes
 from fastNLP.core.utils import _build_args
 from fastNLP.core.utils import _check_function_or_method
 from fastNLP.core.utils import _check_arg_dict_list
 from fastNLP.core.utils import _check_function_or_method
 from fastNLP.core.utils import get_func_signature


 class LossBase(object):
    def __init__(self):
        # key: name in target function; value: name in output function
        self.param_map = {}
        self._checked = False

@@ -159,8 +158,18 @@ class LossBase(object):

        return loss


 class LossFunc(LossBase):
    """A wrapper of user-provided loss function.

    """
    def __init__(self, func, key_map=None, **kwargs):
        """

        :param func: a callable object, such as a function.
        :param dict key_map:
        :param kwargs:
        """
        super(LossFunc, self).__init__()
        _check_function_or_method(func)
        if key_map is not None:
@@ -254,19 +263,19 @@ def _prepare_losser(losser):


 def squash(predict, truth, **kwargs):
    '''To reshape tensors in order to fit Loss functions in pytorch
    """To reshape tensors in order to fit loss functions in pytorch

    :param predict	: Tensor, model output
    :param truth	: Tensor, truth from dataset
    :param **kwargs : extra arguments

    :return predict , truth: predict & truth after processing
    '''
    """
    return predict.view(-1, predict.size()[-1]), truth.view(-1, )


 def unpad(predict, truth, **kwargs):
    '''To process padded sequence output to get true loss
    """To process padded sequence output to get true loss
    Using pack_padded_sequence() method
    This method contains squash()

@@ -277,7 +286,7 @@ def unpad(predict, truth, **kwargs):
                      the i-th element is true lengths of i-th sequence

    :return predict , truth: predict & truth after processing
    '''
    """
    if kwargs.get("lens") is None:
        return predict, truth
    lens = torch.LongTensor(kwargs["lens"])
@@ -288,7 +297,7 @@ def unpad(predict, truth, **kwargs):


 def unpad_mask(predict, truth, **kwargs):
    '''To process padded sequence output to get true loss
    """To process padded sequence output to get true loss
    Using mask() method
    This method contains squash()

@@ -299,7 +308,7 @@ def unpad_mask(predict, truth, **kwargs):
                      the i-th element is true lengths of i-th sequence

    :return predict , truth: predict & truth after processing
    '''
    """
    if kwargs.get("lens") is None:
        return predict, truth
    mas = make_mask(kwargs["lens"], truth.size()[1])
@@ -307,7 +316,7 @@ def unpad_mask(predict, truth, **kwargs):


 def mask(predict, truth, **kwargs):
    '''To select specific elements from Tensor
    """To select specific elements from Tensor
    This method contains squash()

    :param predict	: Tensor, [batch_size , max_len , tag_size]
@@ -317,7 +326,7 @@ def mask(predict, truth, **kwargs):
                      the mask Tensor , the position that is 1 will be selected

    :return predict , truth: predict & truth after processing
    '''
    """
    if kwargs.get("mask") is None:
        return predict, truth
    mask = kwargs["mask"]
@@ -332,14 +341,14 @@ def mask(predict, truth, **kwargs):


 def make_mask(lens, tar_len):
    '''to generate a mask that select [:lens[i]] for i-th element
    """to generate a mask that select [:lens[i]] for i-th element
    embezzle from fastNLP.models.sequence_modeling.seq_mask

    :param lens		: list or LongTensor, [batch_size]
    :param tar_len	: int

    :return mask 	: ByteTensor
    '''
    """
    lens = torch.LongTensor(lens)
    mask = [torch.ge(lens, i + 1) for i in range(tar_len)]
    mask = torch.stack(mask, 1)
@@ -376,9 +385,11 @@ loss_function_name = {
 }


 class Loss(object):
    """a Loss object is a callable object represents loss functions
 class LossFromTorch(object):
    """a LossFromTorch object is a callable object represents loss functions

        This class only helps you with loss functions from PyTorch.
        It has nothing to do with Trainer.
    """

    def __init__(self, loss_name, pre_pro=[squash], **kwargs):
@@ -408,11 +419,11 @@ class Loss(object):
        self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro]

    def add_pre_pro(self, func):
        '''add a pre_pro function
        """add a pre_pro function

        :param func: a function or str, methods to reform parameters before calculating loss
            the strings will be auto translated to pre-defined functions
        '''
        """
        if not callable(func):
            func = method_dict.get(func)
            if func is None:
@@ -421,12 +432,12 @@ class Loss(object):

    @staticmethod
    def _get_loss(loss_name, **kwargs):
        '''Get loss function from torch
        """Get loss function from torch

        :param loss_name: str, the name of loss function
        :param **kwargs: kwargs for torch loss function
        :return: A callable loss function object
        '''
        """
        loss_name = loss_name.strip().lower()
        loss_name = "".join(loss_name.split("_"))

@@ -435,19 +446,19 @@ class Loss(object):
        return loss_function_name[loss_name](**kwargs)

    def get(self):
        '''This method exists just for make some existing codes run error-freely
        '''
        """This method exists just for make some existing codes run error-freely
        """
        return self

    def __call__(self, predict, truth, **kwargs):
        '''call a loss function
        """Call a loss function
        predict and truth will be processed by pre_pro methods in order of addition

        :param predict	: Tensor, model output
        :param truth 	: Tensor, truth from dataset
        :param **kwargs : extra arguments, pass to pre_pro functions
            for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens
        '''
        """
        for f in self.pre_pro:
            if f is None:
                continue
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -308,6 +308,13 @@ def _prepare_metrics(metrics):
    return _metrics


 """
   Attention:  Codes below are not used in current FastNLP. 
   However, it is useful. 

 """


 def _conver_numpy(x):
    """convert input data to numpy array

--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -11,6 +11,12 @@ class Optimizer(object):

 class SGD(Optimizer):
    def __init__(self, model_params=None, lr=0.01, momentum=0):
        """

        :param model_params: a generator. E.g. model.parameters() for PyTorch models.
        :param float lr: learning rate. Default: 0.01
        :param float momentum: momentum. Default: 0
        """
        super(SGD, self).__init__(model_params, lr=lr, momentum=momentum)

    def construct_from_pytorch(self, model_params):
@@ -23,6 +29,12 @@ class SGD(Optimizer):

 class Adam(Optimizer):
    def __init__(self, model_params=None, lr=0.01, weight_decay=0):
        """

        :param model_params: a generator. E.g. model.parameters() for PyTorch models.
        :param float lr: learning rate
        :param float weight_decay:
        """
        super(Adam, self).__init__(model_params, lr=lr, weight_decay=weight_decay)

    def construct_from_pytorch(self, model_params):
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -140,7 +140,6 @@ class Trainer(object):
    def train(self):
        """Start Training.

        :return:
        """
        try:
            if torch.cuda.is_available() and self.use_cuda:
@@ -216,14 +215,6 @@ class Trainer(object):
            pbar.close()

    def _print_train(self):
        """

        :param data_iterator:
        :param model:
        :param epoch:
        :param start:
        :return:
        """
        epoch = 1
        start = time.time()
        while epoch <= self.n_epochs:
--- a/fastNLP/io/base_loader.py
+++ b/fastNLP/io/base_loader.py
@@ -29,19 +29,3 @@ class BaseLoader(object):
            with open(cache_path, 'wb') as f:
                pickle.dump(obj, f)
            return obj


 class ToyLoader0(BaseLoader):
    """
        For CharLM
    """

    def __init__(self, data_path):
        super(ToyLoader0, self).__init__(data_path)

    def load(self):
        with open(self.data_path, 'r') as f:
            corpus = f.read().lower()
        import re
        corpus = re.sub(r"<unk>", "unk", corpus)
        return corpus.split()
--- a/fastNLP/io/config_saver.py
+++ b/fastNLP/io/config_saver.py
@@ -1,6 +1,152 @@
 import configparser
 import json
 import os

 from fastNLP.io.config_loader import ConfigSection, ConfigLoader
 from fastNLP.io.base_loader import BaseLoader


 class ConfigLoader(BaseLoader):
    """loader for configuration files"""

    def __init__(self, data_path=None):
        super(ConfigLoader, self).__init__()
        if data_path is not None:
            self.config = self.parse(super(ConfigLoader, self).load(data_path))

    @staticmethod
    def parse(string):
        raise NotImplementedError

    @staticmethod
    def load_config(file_path, sections):
        """
        :param file_path: the path of config file
        :param sections: the dict of {section_name(string): Section instance}
        Example:
            test_args = ConfigSection()
            ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
        :return: return nothing, but the value of attributes are saved in sessions
        """
        assert isinstance(sections, dict)
        cfg = configparser.ConfigParser()
        if not os.path.exists(file_path):
            raise FileNotFoundError("config file {} not found. ".format(file_path))
        cfg.read(file_path)
        for s in sections:
            attr_list = [i for i in sections[s].__dict__.keys() if
                         not callable(getattr(sections[s], i)) and not i.startswith("__")]
            if s not in cfg:
                print('section %s not found in config file' % (s))
                continue
            gen_sec = cfg[s]
            for attr in gen_sec.keys():
                try:
                    val = json.loads(gen_sec[attr])
                    # print(s, attr, val, type(val))
                    if attr in attr_list:
                        assert type(val) == type(getattr(sections[s], attr)), \
                            'type not match, except %s but got %s' % \
                            (type(getattr(sections[s], attr)), type(val))
                    """
                            if attr in attr_list then check its type and
                        update its value.
                            else add a new attr in sections[s]
                    """
                    setattr(sections[s], attr, val)
                except Exception as e:
                    print("cannot load attribute %s in section %s"
                          % (attr, s))
                    pass


 class ConfigSection(object):

    def __init__(self):
        pass

    def __getitem__(self, key):
        """
        :param key: str, the name of the attribute
        :return attr: the value of this attribute
            if key not in self.__dict__.keys():
                return self[key]
            else:
                raise AttributeError
        """
        if key in self.__dict__.keys():
            return getattr(self, key)
        raise AttributeError("do NOT have attribute %s" % key)

    def __setitem__(self, key, value):
        """
        :param key: str, the name of the attribute
        :param value: the value of this attribute
            if key not in self.__dict__.keys():
                self[key] will be added
            else:
                self[key] will be updated
        """
        if key in self.__dict__.keys():
            if not isinstance(value, type(getattr(self, key))):
                raise AttributeError("attr %s except %s but got %s" %
                                     (key, str(type(getattr(self, key))), str(type(value))))
        setattr(self, key, value)

    def __contains__(self, item):
        """
        :param item: The key of item.
        :return: True if the key in self.__dict__.keys() else False.
        """
        return item in self.__dict__.keys()

    def __eq__(self, other):
        """Overwrite the == operator

        :param other: Another ConfigSection() object which to be compared.
        :return: True if value of each key in each ConfigSection() object are equal to the other, else False.
        """
        for k in self.__dict__.keys():
            if k not in other.__dict__.keys():
                return False
            if getattr(self, k) != getattr(self, k):
                return False

        for k in other.__dict__.keys():
            if k not in self.__dict__.keys():
                return False
            if getattr(self, k) != getattr(self, k):
                return False

        return True

    def __ne__(self, other):
        """Overwrite the != operator

        :param other:
        :return:
        """
        return not self.__eq__(other)

    @property
    def data(self):
        return self.__dict__


 if __name__ == "__main__":
    config = ConfigLoader('there is no data')

    section = {'General': ConfigSection(), 'My': ConfigSection(), 'A': ConfigSection()}
    """
            General and My can be found in config file, so the attr and
        value will be updated
            A cannot be found in config file, so nothing will be done
    """

    config.load_config("../../test/data_for_tests/config", section)
    for s in section:
        print(s)
        for attr in section[s].__dict__.keys():
            print(s, attr, getattr(section[s], attr), type(getattr(section[s], attr)))


 class ConfigSaver(object):
@@ -125,7 +271,7 @@ class ConfigSaver(object):
                    # logger = create_logger(__name__, "./config_loader.log")
                    # logger.warning("section [%s] in config file [%s] has been changed" % (
                    #    section_name, self.file_path
                    #))
                    # ))
                    change_file = True
                    break
            if not change_file:
--- a/fastNLP/io/config_loader.py
+++ b/fastNLP/io/config_loader.py
@@ -1,149 +0,0 @@
 import configparser
 import json
 import os

 from fastNLP.io.base_loader import BaseLoader


 class ConfigLoader(BaseLoader):
    """loader for configuration files"""

    def __init__(self, data_path=None):
        super(ConfigLoader, self).__init__()
        if data_path is not None:
            self.config = self.parse(super(ConfigLoader, self).load(data_path))

    @staticmethod
    def parse(string):
        raise NotImplementedError

    @staticmethod
    def load_config(file_path, sections):
        """
        :param file_path: the path of config file
        :param sections: the dict of {section_name(string): Section instance}
        Example:
            test_args = ConfigSection()
            ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
        :return: return nothing, but the value of attributes are saved in sessions
        """
        assert isinstance(sections, dict)
        cfg = configparser.ConfigParser()
        if not os.path.exists(file_path):
            raise FileNotFoundError("config file {} not found. ".format(file_path))
        cfg.read(file_path)
        for s in sections:
            attr_list = [i for i in sections[s].__dict__.keys() if
                         not callable(getattr(sections[s], i)) and not i.startswith("__")]
            if s not in cfg:
                print('section %s not found in config file' % (s))
                continue
            gen_sec = cfg[s]
            for attr in gen_sec.keys():
                try:
                    val = json.loads(gen_sec[attr])
                    # print(s, attr, val, type(val))
                    if attr in attr_list:
                        assert type(val) == type(getattr(sections[s], attr)), \
                            'type not match, except %s but got %s' % \
                            (type(getattr(sections[s], attr)), type(val))
                    """
                            if attr in attr_list then check its type and
                        update its value.
                            else add a new attr in sections[s]
                    """
                    setattr(sections[s], attr, val)
                except Exception as e:
                    print("cannot load attribute %s in section %s"
                          % (attr, s))
                    pass


 class ConfigSection(object):

    def __init__(self):
        pass

    def __getitem__(self, key):
        """
        :param key: str, the name of the attribute
        :return attr: the value of this attribute
            if key not in self.__dict__.keys():
                return self[key]
            else:
                raise AttributeError
        """
        if key in self.__dict__.keys():
            return getattr(self, key)
        raise AttributeError("do NOT have attribute %s" % key)

    def __setitem__(self, key, value):
        """
        :param key: str, the name of the attribute
        :param value: the value of this attribute
            if key not in self.__dict__.keys():
                self[key] will be added
            else:
                self[key] will be updated
        """
        if key in self.__dict__.keys():
            if not isinstance(value, type(getattr(self, key))):
                raise AttributeError("attr %s except %s but got %s" %
                                     (key, str(type(getattr(self, key))), str(type(value))))
        setattr(self, key, value)

    def __contains__(self, item):
        """
        :param item: The key of item.
        :return: True if the key in self.__dict__.keys() else False.
        """
        return item in self.__dict__.keys()

    def __eq__(self, other):
        """Overwrite the == operator

        :param other: Another ConfigSection() object which to be compared.
        :return: True if value of each key in each ConfigSection() object are equal to the other, else False.
        """
        for k in self.__dict__.keys():
            if k not in other.__dict__.keys():
                return False
            if getattr(self, k) != getattr(self, k):
                return False

        for k in other.__dict__.keys():
            if k not in self.__dict__.keys():
                return False
            if getattr(self, k) != getattr(self, k):
                return False

        return True

    def __ne__(self, other):
        """Overwrite the != operator

        :param other:
        :return:
        """
        return not self.__eq__(other)

    @property
    def data(self):
        return self.__dict__


 if __name__ == "__main__":
    config = ConfigLoader('there is no data')

    section = {'General': ConfigSection(), 'My': ConfigSection(), 'A': ConfigSection()}
    """
            General and My can be found in config file, so the attr and
        value will be updated
            A cannot be found in config file, so nothing will be done
    """

    config.load_config("../../test/data_for_tests/config", section)
    for s in section:
        print(s)
        for attr in section[s].__dict__.keys():
            print(s, attr, getattr(section[s], attr), type(getattr(section[s], attr)))
--- a/fastNLP/io/dataset_loader.py
+++ b/fastNLP/io/dataset_loader.py
@@ -1,4 +1,3 @@
 #TODO: need fix for current DataSet
 import os

 from fastNLP.core.dataset import DataSet
@@ -20,8 +19,7 @@ def convert_seq_dataset(data):
    """
    dataset = DataSet()
    for word_seq in data:
        x = TextField(word_seq, is_target=False)
        dataset.append(Instance(word_seq=x))
        dataset.append(Instance(word_seq=word_seq))
    return dataset


@@ -40,11 +38,7 @@ def convert_seq2tag_dataset(data):
    """
    dataset = DataSet()
    for sample in data:
        word_seq, label = sample[0], sample[1]
        ins = Instance()
        ins.add_field("word_seq", TextField(word_seq, is_target=False)) \
            .add_field("label", LabelField(label, is_target=True))
        dataset.append(ins)
        dataset.append(Instance(word_seq=sample[0], label=sample[1]))
    return dataset


@@ -63,11 +57,7 @@ def convert_seq2seq_dataset(data):
    """
    dataset = DataSet()
    for sample in data:
        word_seq, label_seq = sample[0], sample[1]
        ins = Instance()
        ins.add_field("word_seq", TextField(word_seq, is_target=False)) \
            .add_field("label_seq", TextField(label_seq, is_target=True))
        dataset.append(ins)
        dataset.append(Instance(word_seq=sample[0], label_seq=sample[1]))
    return dataset


@@ -273,85 +263,6 @@ class ClassDataSetLoader(DataSetLoader):
        return convert_seq2tag_dataset(data)


@DataSet.set_reader('read_conll')
 class ConllLoader(DataSetLoader):
    """loader for conll format files"""

    def __init__(self):
        """
        :param str data_path: the path to the conll data set
        """
        super(ConllLoader, self).__init__()

    def load(self, data_path):
        """
        :return: list lines: all lines in a conll file
        """
        with open(data_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
        data = self.parse(lines)
        return self.convert(data)

    @staticmethod
    def parse(lines):
        """
        :param list lines:a list containing all lines in a conll file.
        :return: a 3D list
        """
        sentences = list()
        tokens = list()
        for line in lines:
            if line[0] == "#":
                # skip the comments
                continue
            if line == "\n":
                sentences.append(tokens)
                tokens = []
                continue
            tokens.append(line.split())
        return sentences

    def convert(self, data):
        pass


@DataSet.set_reader('read_lm')
 class LMDataSetLoader(DataSetLoader):
    """Language Model Dataset Loader

        This loader produces data for language model training in a supervised way.
        That means it has X and Y.

    """

    def __init__(self):
        super(LMDataSetLoader, self).__init__()

    def load(self, data_path):
        if not os.path.exists(data_path):
            raise FileNotFoundError("file {} not found.".format(data_path))
        with open(data_path, "r", encoding="utf=8") as f:
            text = " ".join(f.readlines())
        tokens = text.strip().split()
        data = self.sentence_cut(tokens)
        return self.convert(data)

    def sentence_cut(self, tokens, sentence_length=15):
        start_idx = 0
        data_set = []
        for idx in range(len(tokens) // sentence_length):
            x = tokens[start_idx * idx: start_idx * idx + sentence_length]
            y = tokens[start_idx * idx + 1: start_idx * idx + sentence_length + 1]
            if start_idx * idx + sentence_length + 1 >= len(tokens):
                # ad hoc
                y.extend(["<unk>"])
            data_set.append([x, y])
        return data_set

    def convert(self, data):
        pass


@DataSet.set_reader('read_people_daily')
 class PeopleDailyCorpusLoader(DataSetLoader):
    """
@@ -403,10 +314,19 @@ class PeopleDailyCorpusLoader(DataSetLoader):
            pos_tag_examples.append([sent_words, sent_pos_tag])
            ner_examples.append([sent_words, sent_ner])
        # List[List[List[str], List[str]]]
        return pos_tag_examples, ner_examples
        # ner_examples not used
        return self.convert(pos_tag_examples)

    def convert(self, data):
        pass
        data_set = DataSet()
        for item in data:
            sent_words, sent_pos_tag = item[0], item[1]
            data_set.append(Instance(words=sent_words, tags=sent_pos_tag))
        data_set.apply(lambda ins: len(ins), new_field_name="seq_len")
        data_set.set_target("tags")
        data_set.set_input("sent_words")
        data_set.set_input("seq_len")
        return data_set


 class SNLIDataSetLoader(DataSetLoader):
@@ -462,17 +382,13 @@ class SNLIDataSetLoader(DataSetLoader):
        for example in data:
            p, h, l = example
            # list, list, str
            x1 = TextField(p, is_target=False)
            x2 = TextField(h, is_target=False)
            x1_len = TextField([1] * len(p), is_target=False)
            x2_len = TextField([1] * len(h), is_target=False)
            y = LabelField(l, is_target=True)
            instance = Instance()
            instance.add_field("premise", x1)
            instance.add_field("hypothesis", x2)
            instance.add_field("premise_len", x1_len)
            instance.add_field("hypothesis_len", x2_len)
            instance.add_field("truth", y)
            instance.add_field("premise", p)
            instance.add_field("hypothesis", h)
            instance.add_field("truth", l)
            data_set.append(instance)

        data_set.apply(lambda ins: len(ins["premise"]), new_field_name="premise_len")
        data_set.apply(lambda ins: len(ins["hypothesis"]), new_field_name="hypothesis_len")
        data_set.set_input("premise", "hypothesis", "premise_len", "hypothesis_len")
        data_set.set_target("truth")
        return data_set
--- a/fastNLP/io/model_saver.py
+++ b/fastNLP/io/model_saver.py
@@ -1,5 +1,32 @@
 import torch

 from fastNLP.io.base_loader import BaseLoader


 class ModelLoader(BaseLoader):
    """
        Loader for models.
    """

    def __init__(self):
        super(ModelLoader, self).__init__()

    @staticmethod
    def load_pytorch(empty_model, model_path):
        """
        Load model parameters from .pkl files into the empty PyTorch model.
        :param empty_model: a PyTorch model with initialized parameters.
        :param model_path: str, the path to the saved model.
        """
        empty_model.load_state_dict(torch.load(model_path))

    @staticmethod
    def load_pytorch_model(model_path):
        """Load the entire model.

        """
        return torch.load(model_path)


 class ModelSaver(object):
    """Save a model
@@ -8,6 +35,7 @@ class ModelSaver(object):
            saver.save_pytorch(model)

    """

    def __init__(self, save_path):
        """

--- a/fastNLP/io/model_loader.py
+++ b/fastNLP/io/model_loader.py
@@ -1,28 +0,0 @@
 import torch

 from fastNLP.io.base_loader import BaseLoader


 class ModelLoader(BaseLoader):
    """
        Loader for models.
    """

    def __init__(self):
        super(ModelLoader, self).__init__()

    @staticmethod
    def load_pytorch(empty_model, model_path):
        """
        Load model parameters from .pkl files into the empty PyTorch model.
        :param empty_model: a PyTorch model with initialized parameters.
        :param model_path: str, the path to the saved model.
        """
        empty_model.load_state_dict(torch.load(model_path))

    @staticmethod
    def load_pytorch_model(model_path):
        """Load the entire model.

        """
        return torch.load(model_path)
--- a/reproduction/Biaffine_parser/infer.py
+++ b/reproduction/Biaffine_parser/infer.py
@@ -5,7 +5,7 @@ sys.path.extend(['/home/yfshao/workdir/dev_fastnlp'])

 from fastNLP.api.processor import *
 from fastNLP.models.biaffine_parser import BiaffineParser
 from fastNLP.io.config_loader import ConfigSection, ConfigLoader
 from fastNLP.io.config_io import ConfigSection, ConfigLoader

 import _pickle as pickle
 import torch
--- a/reproduction/Biaffine_parser/run.py
+++ b/reproduction/Biaffine_parser/run.py
@@ -13,11 +13,10 @@ from fastNLP.core.vocabulary import Vocabulary
 from fastNLP.core.dataset import DataSet
 from fastNLP.core.field import TextField, SeqLabelField
 from fastNLP.core.tester import Tester
 from fastNLP.io.config_loader import ConfigLoader, ConfigSection
 from fastNLP.io.model_loader import ModelLoader
 from fastNLP.io.config_io import ConfigLoader, ConfigSection
 from fastNLP.io.model_io import ModelLoader, ModelSaver
 from fastNLP.io.embed_loader import EmbedLoader
 from fastNLP.models.biaffine_parser import BiaffineParser
 from fastNLP.io.model_saver import ModelSaver

 BOS = '<BOS>'
 EOS = '<EOS>'
--- a/reproduction/LSTM+self_attention_sentiment_analysis/main.py
+++ b/reproduction/LSTM+self_attention_sentiment_analysis/main.py
@@ -2,8 +2,8 @@ import torch.nn.functional as F

 from fastNLP.core.trainer import ClassificationTrainer
 from fastNLP.core.utils import ClassPreprocess as Preprocess
 from fastNLP.io.config_loader import ConfigLoader
 from fastNLP.io.config_loader import ConfigSection
 from fastNLP.io.config_io import ConfigLoader
 from fastNLP.io.config_io import ConfigSection
 from fastNLP.io.dataset_loader import ClassDataSetLoader as Dataset_loader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules.aggregator.self_attention import SelfAttention
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@@ -3,12 +3,11 @@ import sys

 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

 from fastNLP.io.config_loader import ConfigLoader, ConfigSection
 from fastNLP.io.config_io import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.io.dataset_loader import BaseLoader, TokenizeDataSetLoader
 from fastNLP.core.utils import load_pickle
 from fastNLP.io.model_saver import ModelSaver
 from fastNLP.io.model_loader import ModelLoader
 from fastNLP.io.model_io import ModelLoader, ModelSaver
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import AdvSeqLabel
 from fastNLP.core.predictor import SeqLabelInfer
--- a/setup.py
+++ b/setup.py
@@ -12,12 +12,12 @@ with open('requirements.txt', encoding='utf-8') as f:
    reqs = f.read()

 setup(
    name='fastNLP',
    name='FastNLP',
    version='0.1.1',
    description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team',
    long_description=readme,
    license=license,
    author='fudanNLP',
    author='FudanNLP',
    python_requires='>=3.5',
    packages=find_packages(),
    install_requires=reqs.strip().split('\n'),
--- a/test/api/test_processor.py
+++ b/test/api/test_processor.py
@@ -0,0 +1,12 @@
 import unittest

 from fastNLP.api.processor import FullSpaceToHalfSpaceProcessor
 from fastNLP.core.dataset import DataSet


 class TestProcessor(unittest.TestCase):
    def test_FullSpaceToHalfSpaceProcessor(self):
        ds = DataSet({"word": ["０0, u１, u), (u２, u2"]})
        proc = FullSpaceToHalfSpaceProcessor("word")
        ds = proc(ds)
        self.assertTrue(ds.field_arrays["word"].content, ["00, u1, u), (u2, u2"])
--- a/test/core/test_loss.py
+++ b/test/core/test_loss.py
@@ -45,7 +45,7 @@ class TestLoss(unittest.TestCase):
        # 验证squash()的正确性

        log = math.log
        loss_func = loss.Loss("nll")
        loss_func = loss.LossFromTorch("nll")

        y = tc.Tensor(
            [
@@ -129,7 +129,7 @@ class TestLoss(unittest.TestCase):
        lens = [4, 2, 1]
        y = tc.log(y)

        loss_func = loss.Loss("nll", pre_pro=["unpad"])
        loss_func = loss.LossFromTorch("nll", pre_pro=["unpad"])
        los = loss_func(y, gy, lens=lens)

        r = -log(.1) - log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
@@ -169,7 +169,7 @@ class TestLoss(unittest.TestCase):

        lens = [2, 4, 2]

        loss_func = loss.Loss("nll", pre_pro=["mask"])
        loss_func = loss.LossFromTorch("nll", pre_pro=["mask"])
        los = loss_func(y, gy, mask=mask)

        los2 = loss_func(y, gy, mask=loss.make_mask(lens, gy.size()[-1]))
@@ -205,7 +205,7 @@ class TestLoss(unittest.TestCase):

        y = tc.log(y)

        loss_func = loss.Loss("nll", pre_pro=["unpad_mask"])
        loss_func = loss.LossFromTorch("nll", pre_pro=["unpad_mask"])
        los = loss_func(y, gy, lens=lens)

        r = -log(.1) - log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
@@ -235,7 +235,7 @@ class TestLoss(unittest.TestCase):
        lens = [4, 2, 1]
        y = tc.log(y)

        loss_func = loss.Loss("nll", pre_pro=[], weight=tc.Tensor([1, 1, 0]))
        loss_func = loss.LossFromTorch("nll", pre_pro=[], weight=tc.Tensor([1, 1, 0]))
        loss_func.add_pre_pro("unpad_mask")
        los = loss_func(y, gy, lens=lens)

--- a/test/io/test_config_saver.py
+++ b/test/io/test_config_saver.py
@@ -1,8 +1,7 @@
 import os
 import unittest

 from fastNLP.io.config_loader import ConfigSection, ConfigLoader
 from fastNLP.io.config_saver import ConfigSaver
 from fastNLP.io.config_io import ConfigSection, ConfigLoader, ConfigSaver


 class TestConfigSaver(unittest.TestCase):