Merge pull request #1 from fastnlp/master

update
6 years ago · c80ae39fb3
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
 [![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP)
 [![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP)
 [![PyPI version](https://badge.fury.io/py/fastNLP.svg)](https://badge.fury.io/py/fastNLP)
 ![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
 [![Documentation Status](https://readthedocs.org/projects/fastnlp/badge/?version=latest)](http://fastnlp.readthedocs.io/?badge=latest)
 fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below:
@@ -30,6 +33,7 @@ A typical fastNLP routine is composed of four phases: loading dataset, pre-proce
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import encoder
 from fastNLP.modules import aggregation
 from fastNLP.modules import decoder
 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.loader.preprocess import ClassPreprocess
@@ -42,20 +46,20 @@ class ClassificationModel(BaseModel):
    Simple text classification model based on CNN.
    """
    def __init__(self, class_num, vocab_size):
    def __init__(self, num_classes, vocab_size):
        super(ClassificationModel, self).__init__()
        self.embed = encoder.Embedding(nums=vocab_size, dims=300)
        self.conv = encoder.Conv(
        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.pool = aggregation.MaxPool()
        self.output = encoder.Linear(input_size=100, output_size=class_num)
        self.agg = aggregation.MaxPool()
        self.dec = decoder.MLP(100, num_classes=num_classes)
    def forward(self, x):
        x = self.embed(x)  # [N,L] -> [N,L,C]
        x = self.conv(x)  # [N,L,C_in] -> [N,L,C_out]
        x = self.pool(x)  # [N,L,C] -> [N,C]
        x = self.output(x)  # [N,C] -> [N, N_class]
        x = self.emb(x)  # [N,L] -> [N,L,C]
        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
        x = self.agg(x)  # [N,L,C] -> [N,C]
        x = self.dec(x)  # [N,C] -> [N, N_class]
        return x
@@ -75,7 +79,7 @@ model_args = {
    'num_classes': n_classes,
    'vocab_size': vocab_size
 }
 model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size)
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)
 # train model
 train_args = {
--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@@ -0,0 +1,27 @@
 import torch
 class Loss(object):
    """Loss function of the algorithm,
    either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support)
    """
    def __init__(self, args):
        if args is None:
            # this is useful when
            self._loss = None
        elif isinstance(args, str):
            self._loss = self._borrow_from_pytorch(args)
        else:
            raise NotImplementedError
    def get(self):
        return self._loss
    @staticmethod
    def _borrow_from_pytorch(loss_name):
        if loss_name == "cross_entropy":
            return torch.nn.CrossEntropyLoss()
        else:
            raise NotImplementedError
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -1,3 +1,54 @@
 """
 use optimizer from Pytorch
 """
 import torch
 class Optimizer(object):
    """Wrapper of optimizer from framework
            names: arguments (type)
            1. Adam: lr (float), weight_decay (float)
            2. AdaGrad
            3. RMSProp
            4. SGD: lr (float), momentum (float)
    """
    def __init__(self, optimizer_name, **kwargs):
        """
        :param optimizer_name: str, the name of the optimizer
        :param kwargs: the arguments
        """
        self.optim_name = optimizer_name
        self.kwargs = kwargs
    @property
    def name(self):
        return self.optim_name
    @property
    def params(self):
        return self.kwargs
    def construct_from_pytorch(self, model_params):
        """construct a optimizer from framework over given model parameters"""
        if self.optim_name in ["SGD", "sgd"]:
            if "lr" in self.kwargs:
                if "momentum" not in self.kwargs:
                    self.kwargs["momentum"] = 0
                optimizer = torch.optim.SGD(model_params, lr=self.kwargs["lr"], momentum=self.kwargs["momentum"])
            else:
                raise ValueError("requires learning rate for SGD optimizer")
        elif self.optim_name in ["adam", "Adam"]:
            if "lr" in self.kwargs:
                if "weight_decay" not in self.kwargs:
                    self.kwargs["weight_decay"] = 0
                optimizer = torch.optim.Adam(model_params, lr=self.kwargs["lr"],
                                             weight_decay=self.kwargs["weight_decay"])
            else:
                raise ValueError("requires learning rate for Adam optimizer")
        else:
            raise NotImplementedError
        return optimizer
--- a/fastNLP/core/preprocess.py
+++ b/fastNLP/core/preprocess.py
@@ -19,13 +19,13 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1,
 def save_pickle(obj, pickle_path, file_name):
    with open(os.path.join(pickle_path, file_name), "wb") as f:
        _pickle.dump(obj, f)
    print("{} saved. ".format(file_name))
    print("{} saved in {}".format(file_name, pickle_path))
 def load_pickle(pickle_path, file_name):
    with open(os.path.join(pickle_path, file_name), "rb") as f:
        obj = _pickle.load(f)
    print("{} loaded. ".format(file_name))
    print("{} loaded from {}".format(file_name, pickle_path))
    return obj
@@ -59,7 +59,6 @@ class BasePreprocess(object):
    def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10):
        """Main preprocessing pipeline.
        :param train_dev_data: three-level list, with either single label or multiple labels in a sample.
        :param test_data: three-level list, with either single label or multiple labels in a sample. (optional)
        :param pickle_path: str, the path to save the pickle files.
@@ -98,6 +97,8 @@ class BasePreprocess(object):
                save_pickle(data_train, pickle_path, "data_train.pkl")
            else:
                data_train = load_pickle(pickle_path, "data_train.pkl")
                if pickle_exist(pickle_path, "data_dev.pkl"):
                    data_dev = load_pickle(pickle_path, "data_dev.pkl")
        else:
            # cross_val is True
            if not pickle_exist(pickle_path, "data_train_0.pkl"):
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -1,5 +1,3 @@
 import _pickle
 import numpy as np
 import torch
@@ -14,43 +12,78 @@ logger = create_logger(__name__, "./train_test.log")
 class BaseTester(object):
    """An collection of model inference and evaluation of performance, used over validation/dev set and test set. """
    def __init__(self, test_args):
    def __init__(self, **kwargs):
        """
        :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
        :param kwargs: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
        """
        super(BaseTester, self).__init__()
        self.validate_in_training = test_args["validate_in_training"]
        self.save_dev_data = None
        self.save_output = test_args["save_output"]
        self.output = None
        self.save_loss = test_args["save_loss"]
        self.mean_loss = None
        self.batch_size = test_args["batch_size"]
        self.pickle_path = test_args["pickle_path"]
        self.iterator = None
        self.use_cuda = test_args["use_cuda"]
        self.model = None
        """
            "default_args" provides default value for important settings. 
            The initialization arguments "kwargs" with the same key (name) will override the default value. 
            "kwargs" must have the same type as "default_args" on corresponding keys. 
            Otherwise, error will raise.
        """
        default_args = {"save_output": False,  # collect outputs of validation set
                        "save_loss": False,  # collect losses in validation
                        "save_best_dev": False,  # save best model during validation
                        "batch_size": 8,
                        "use_cuda": True,
                        "pickle_path": "./save/",
                        "model_name": "dev_best_model.pkl",
                        "print_every_step": 1,
                        }
        """
            "required_args" is the collection of arguments that users must pass to Trainer explicitly. 
            This is used to warn users of essential settings in the training. 
            Obviously, "required_args" is the subset of "default_args". 
            The value in "default_args" to the keys in "required_args" is simply for type check. 
        """
        # TODO: required arguments
        required_args = {}
        for req_key in required_args:
            if req_key not in kwargs:
                logger.error("Tester lacks argument {}".format(req_key))
                raise ValueError("Tester lacks argument {}".format(req_key))
        for key in default_args:
            if key in kwargs:
                if isinstance(kwargs[key], type(default_args[key])):
                    default_args[key] = kwargs[key]
                else:
                    msg = "Argument %s type mismatch: expected %s while get %s" % (
                        key, type(default_args[key]), type(kwargs[key]))
                    logger.error(msg)
                    raise ValueError(msg)
            else:
                # BeseTester doesn't care about extra arguments
                pass
        print(default_args)
        self.save_output = default_args["save_output"]
        self.save_best_dev = default_args["save_best_dev"]
        self.save_loss = default_args["save_loss"]
        self.batch_size = default_args["batch_size"]
        self.pickle_path = default_args["pickle_path"]
        self.use_cuda = default_args["use_cuda"]
        self.print_every_step = default_args["print_every_step"]
        self._model = None
        self.eval_history = []
        self.batch_output = []
    def test(self, network, dev_data):
        if torch.cuda.is_available() and self.use_cuda:
            self.model = network.cuda()
            self._model = network.cuda()
        else:
            self.model = network
            self._model = network
        # turn on the testing mode; clean up the history
        self.mode(network, test=True)
        self.eval_history.clear()
        self.batch_output.clear()
        # dev_data = self.prepare_input(self.pickle_path)
        # logger.info("validation data loaded")
        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
        n_batches = len(dev_data) // self.batch_size
        print_every_step = 1
        step = 0
        for batch_x, batch_y in self.make_batch(iterator, dev_data):
@@ -65,21 +98,10 @@ class BaseTester(object):
            print_output = "[test step {}] {}".format(step, eval_results)
            logger.info(print_output)
            if step % print_every_step == 0:
            if self.print_every_step > 0 and step % self.print_every_step == 0:
                print(print_output)
            step += 1
    def prepare_input(self, data_path):
        """Save the dev data once it is loaded. Can return directly next time.
        :param data_path: str, the path to the pickle data for dev
        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
        """
        if self.save_dev_data is None:
            data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
            self.save_dev_data = data_dev
        return self.save_dev_data
    def mode(self, model, test):
        """Train mode or Test mode. This is for PyTorch currently.
@@ -117,15 +139,14 @@ class SeqLabelTester(BaseTester):
    Tester for sequence labeling.
    """
    def __init__(self, test_args):
    def __init__(self, **test_args):
        """
        :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
        """
        super(SeqLabelTester, self).__init__(test_args)
        super(SeqLabelTester, self).__init__(**test_args)
        self.max_len = None
        self.mask = None
        self.seq_len = None
        self.batch_result = None
    def data_forward(self, network, inputs):
        """This is only for sequence labeling with CRF decoder.
@@ -159,14 +180,14 @@ class SeqLabelTester(BaseTester):
        :return:
        """
        batch_size, max_len = predict.size(0), predict.size(1)
        loss = self.model.loss(predict, truth, self.mask) / batch_size
        loss = self._model.loss(predict, truth, self.mask) / batch_size
        prediction = self.model.prediction(predict, self.mask)
        results = torch.Tensor(prediction).view(-1,)
        prediction = self._model.prediction(predict, self.mask)
        results = torch.Tensor(prediction).view(-1, )
        # make sure "results" is in the same device as "truth"
        results = results.to(truth)
        accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0]
        return [loss.data, accuracy.data]
        return [float(loss), float(accuracy)]
    def metrics(self):
        batch_loss = np.mean([x[0] for x in self.eval_history])
@@ -184,21 +205,16 @@ class SeqLabelTester(BaseTester):
    def make_batch(self, iterator, data):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True)
 class ClassificationTester(BaseTester):
    """Tester for classification."""
    def __init__(self, test_args):
    def __init__(self, **test_args):
        """
        :param test_args: a dict-like object that has __getitem__ method, \
            can be accessed by "test_args["key_str"]"
        """
        super(ClassificationTester, self).__init__(test_args)
        self.pickle_path = test_args["pickle_path"]
        self.save_dev_data = None
        self.output = None
        self.mean_loss = None
        self.iterator = None
        super(ClassificationTester, self).__init__(**test_args)
    def make_batch(self, iterator, data, max_len=None):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len)
@@ -221,4 +237,3 @@ class ClassificationTester(BaseTester):
        y_true = torch.cat(y_true, dim=0)
        acc = float(torch.sum(y_pred == y_true)) / len(y_true)
        return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -4,12 +4,12 @@ import os
 import time
 from datetime import timedelta
 import numpy as np
 import torch
 import torch.nn as nn
 from fastNLP.core.action import Action
 from fastNLP.core.action import RandomSampler, Batchifier
 from fastNLP.core.loss import Loss
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.tester import SeqLabelTester, ClassificationTester
 from fastNLP.modules import utils
 from fastNLP.saver.logger import create_logger
@@ -23,14 +23,13 @@ class BaseTrainer(object):
    """Operations to train a model, including data loading, SGD, and validation.
        Subclasses must implement the following abstract methods:
        - define_optimizer
        - grad_backward
        - get_loss
    """
    def __init__(self, train_args):
    def __init__(self, **kwargs):
        """
        :param train_args: dict of (key, value), or dict-like object. key is str.
        :param kwargs: dict of (key, value), or dict-like object. key is str.
        The base trainer requires the following keys:
        - epochs: int, the number of epochs in training
@@ -39,64 +38,90 @@ class BaseTrainer(object):
        - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()
        self.n_epochs = train_args["epochs"]
        self.batch_size = train_args["batch_size"]
        self.pickle_path = train_args["pickle_path"]
        self.validate = train_args["validate"]
        self.save_best_dev = train_args["save_best_dev"]
        self.model_saved_path = train_args["model_saved_path"]
        self.use_cuda = train_args["use_cuda"]
        self.model = None
        self.iterator = None
        self.loss_func = None
        self.optimizer = None
        """
            "default_args" provides default value for important settings. 
            The initialization arguments "kwargs" with the same key (name) will override the default value. 
            "kwargs" must have the same type as "default_args" on corresponding keys. 
            Otherwise, error will raise.
        """
        default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
                        "save_best_dev": True, "model_name": "default_model_name.pkl", "print_every_step": 1,
                        "loss": Loss(None),
                        "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0)
                        }
        """
            "required_args" is the collection of arguments that users must pass to Trainer explicitly. 
            This is used to warn users of essential settings in the training. 
            Obviously, "required_args" is the subset of "default_args". 
            The value in "default_args" to the keys in "required_args" is simply for type check. 
        """
        # TODO: required arguments
        required_args = {}
        for req_key in required_args:
            if req_key not in kwargs:
                logger.error("Trainer lacks argument {}".format(req_key))
                raise ValueError("Trainer lacks argument {}".format(req_key))
        for key in default_args:
            if key in kwargs:
                if isinstance(kwargs[key], type(default_args[key])):
                    default_args[key] = kwargs[key]
                else:
                    msg = "Argument %s type mismatch: expected %s while get %s" % (
                        key, type(default_args[key]), type(kwargs[key]))
                    logger.error(msg)
                    raise ValueError(msg)
            else:
                # BaseTrainer doesn't care about extra arguments
                pass
        print(default_args)
        self.n_epochs = default_args["epochs"]
        self.batch_size = default_args["batch_size"]
        self.pickle_path = default_args["pickle_path"]
        self.validate = default_args["validate"]
        self.save_best_dev = default_args["save_best_dev"]
        self.use_cuda = default_args["use_cuda"]
        self.model_name = default_args["model_name"]
        self.print_every_step = default_args["print_every_step"]
        self._model = None
        self._loss_func = default_args["loss"].get()  # return a pytorch loss function or None
        self._optimizer = None
        self._optimizer_proto = default_args["optimizer"]
    def train(self, network, train_data, dev_data=None):
        """General Training Steps
        """General Training Procedure
        :param network: a model
        :param train_data: three-level list, the training set.
        :param dev_data: three-level list, the validation data (optional)
        The method is framework independent.
        Work by calling the following methods:
            - prepare_input
            - mode
            - define_optimizer
            - data_forward
            - get_loss
            - grad_backward
            - update
        Subclasses must implement these methods with a specific framework.
        """
        # prepare model and data, transfer model to gpu if available
        # transfer model to gpu if available
        if torch.cuda.is_available() and self.use_cuda:
            self.model = network.cuda()
            self._model = network.cuda()
            # self._model is used to access model-specific loss
        else:
            self.model = network
            self._model = network
        # train_data = self.load_train_data(self.pickle_path)
        # logger.info("training data loaded")
        # define tester over dev data
        # define Tester over dev data
        if self.validate:
            default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                                  "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
                                  "use_cuda": self.use_cuda}
                                  "use_cuda": self.use_cuda, "print_every_step": 0}
            validator = self._create_validator(default_valid_args)
            logger.info("validator defined as {}".format(str(validator)))
        # optimizer and loss
        self.define_optimizer()
        logger.info("optimizer defined as {}".format(str(self.optimizer)))
        logger.info("optimizer defined as {}".format(str(self._optimizer)))
        self.define_loss()
        logger.info("loss function defined as {}".format(str(self._loss_func)))
        # main training epochs
        n_samples = len(train_data)
        n_batches = n_samples // self.batch_size
        n_print = 1
        # main training procedure
        start = time.time()
        logger.info("training epochs started")
        for epoch in range(1, self.n_epochs + 1):
            logger.info("training epoch {}".format(epoch))
@@ -106,23 +131,30 @@ class BaseTrainer(object):
            data_iterator = iter(Batchifier(RandomSampler(train_data), self.batch_size, drop_last=False))
            logger.info("prepared data iterator")
            self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch)
            # one forward and backward pass
            self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch)
            # validation
            if self.validate:
                logger.info("validation started")
                validator.test(network, dev_data)
                if self.save_best_dev and self.best_eval_result(validator):
                    self.save_model(network)
                    print("saved better model selected by dev")
                    logger.info("saved better model selected by dev")
                    self.save_model(network, self.model_name)
                    print("Saved better model selected by validation.")
                    logger.info("Saved better model selected by validation.")
                valid_results = validator.show_matrices()
                print("[epoch {}] {}".format(epoch, valid_results))
                logger.info("[epoch {}] {}".format(epoch, valid_results))
    def _train_step(self, data_iterator, network, **kwargs):
        """Training process in one epoch."""
        """Training process in one epoch.
            kwargs should contain:
                - n_print: int, print training information every n steps.
                - start: time.time(), the starting time of this step.
                - epoch: int,
        """
        step = 0
        for batch_x, batch_y in self.make_batch(data_iterator):
@@ -132,7 +164,7 @@ class BaseTrainer(object):
            self.grad_backward(loss)
            self.update()
            if step % kwargs["n_print"] == 0:
            if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
                end = time.time()
                diff = timedelta(seconds=round(end - kwargs["start"]))
                print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
@@ -153,6 +185,11 @@ class BaseTrainer(object):
            logger.error("the number of folds in train and dev data unequals {}!={}".format(len(train_data_cv),
                                                                                            len(dev_data_cv)))
            raise RuntimeError("the number of folds in train and dev data unequals")
        if self.validate is False:
            logger.warn("Cross validation requires self.validate to be True. Please turn it on. ")
            print("[warning] Cross validation requires self.validate to be True. Please turn it on. ")
            self.validate = True
        n_fold = len(train_data_cv)
        logger.info("perform {} folds cross validation.".format(n_fold))
        for i in range(n_fold):
@@ -186,7 +223,7 @@ class BaseTrainer(object):
        """
        Define framework-specific optimizer specified by the models.
        """
        raise NotImplementedError
        self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters())
    def update(self):
        """
@@ -194,7 +231,7 @@ class BaseTrainer(object):
        For PyTorch, just call optimizer to update.
        """
        raise NotImplementedError
        self._optimizer.step()
    def data_forward(self, network, x):
        raise NotImplementedError
@@ -206,7 +243,8 @@ class BaseTrainer(object):
        For PyTorch, just do "loss.backward()"
        """
        raise NotImplementedError
        self._model.zero_grad()
        loss.backward()
    def get_loss(self, predict, truth):
        """
@@ -215,21 +253,25 @@ class BaseTrainer(object):
        :param truth: ground truth label vector
        :return: a scalar
        """
        if self.loss_func is None:
            if hasattr(self.model, "loss"):
                self.loss_func = self.model.loss
                logger.info("The model has a loss function, use it.")
            else:
                logger.info("The model didn't define loss, use Trainer's loss.")
                self.define_loss()
        return self.loss_func(predict, truth)
        return self._loss_func(predict, truth)
    def define_loss(self):
        """
            Assign an instance of loss function to self.loss_func
            E.g. self.loss_func = nn.CrossEntropyLoss()
        if the model defines a loss, use model's loss.
        Otherwise, Trainer must has a loss argument, use it as loss.
        These two losses cannot be defined at the same time.
        Trainer does not handle loss definition or choose default losses.
        """
        raise NotImplementedError
        if hasattr(self._model, "loss") and self._loss_func is not None:
            raise ValueError("Both the model and Trainer define loss. Please take out your loss.")
        if hasattr(self._model, "loss"):
            self._loss_func = self._model.loss
            logger.info("The model has a loss function, use it.")
        else:
            if self._loss_func is None:
                raise ValueError("Please specify a loss function.")
            logger.info("The model didn't define loss, use Trainer's loss.")
    def best_eval_result(self, validator):
        """
@@ -238,71 +280,35 @@ class BaseTrainer(object):
        """
        raise NotImplementedError
    def save_model(self, network):
        """
    def save_model(self, network, model_name):
        """Save this model with such a name.
        This method may be called multiple times by Trainer to overwritten a better model.
        :param network: the PyTorch model
        model_best_dev.pkl may be overwritten by a better model in future epochs.
        :param model_name: str
        """
        ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network)
        if model_name[-4:] != ".pkl":
            model_name += ".pkl"
        ModelSaver(self.pickle_path + model_name).save_pytorch(network)
    def _create_validator(self, valid_args):
        raise NotImplementedError
 class ToyTrainer(BaseTrainer):
    """
        An example to show the definition of Trainer.
    """
    def __init__(self, training_args):
        super(ToyTrainer, self).__init__(training_args)
    def load_train_data(self, data_path):
        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        return data_train, data_dev, 0, 1
    def data_forward(self, network, x):
        return network(x)
    def grad_backward(self, loss):
        self.model.zero_grad()
        loss.backward()
    def get_loss(self, pred, truth):
        return np.mean(np.square(pred - truth))
    def define_optimizer(self):
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)
    def update(self):
        self.optimizer.step()
 class SeqLabelTrainer(BaseTrainer):
    """
    Trainer for Sequence Modeling
    Trainer for Sequence Labeling
    """
    def __init__(self, train_args):
        super(SeqLabelTrainer, self).__init__(train_args)
        self.vocab_size = train_args["vocab_size"]
        self.num_classes = train_args["num_classes"]
    def __init__(self, **kwargs):
        super(SeqLabelTrainer, self).__init__(**kwargs)
        # self.vocab_size = kwargs["vocab_size"]
        # self.num_classes = kwargs["num_classes"]
        self.max_len = None
        self.mask = None
        self.best_accuracy = 0.0
    def define_optimizer(self):
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
    def grad_backward(self, loss):
        self.model.zero_grad()
        loss.backward()
    def update(self):
        self.optimizer.step()
    def data_forward(self, network, inputs):
        if not isinstance(inputs, tuple):
            raise RuntimeError("output_length must be true for sequence modeling. Receive {}".format(type(inputs[0])))
@@ -330,7 +336,7 @@ class SeqLabelTrainer(BaseTrainer):
        batch_size, max_len = predict.size(0), predict.size(1)
        assert truth.shape == (batch_size, max_len)
        loss = self.model.loss(predict, truth, self.mask)
        loss = self._model.loss(predict, truth, self.mask)
        return loss
    def best_eval_result(self, validator):
@@ -345,48 +351,25 @@ class SeqLabelTrainer(BaseTrainer):
        return Action.make_batch(iterator, output_length=True, use_cuda=self.use_cuda)
    def _create_validator(self, valid_args):
        return SeqLabelTester(valid_args)
        return SeqLabelTester(**valid_args)
 class ClassificationTrainer(BaseTrainer):
    """Trainer for classification."""
    """Trainer for text classification."""
    def __init__(self, train_args):
        super(ClassificationTrainer, self).__init__(train_args)
        self.learn_rate = train_args["learn_rate"]
        self.momentum = train_args["momentum"]
    def __init__(self, **train_args):
        super(ClassificationTrainer, self).__init__(**train_args)
        self.iterator = None
        self.loss_func = None
        self.optimizer = None
        self.best_accuracy = 0
    def define_loss(self):
        self.loss_func = nn.CrossEntropyLoss()
    def define_optimizer(self):
        """
        Define framework-specific optimizer specified by the models.
        """
        self.optimizer = torch.optim.SGD(
            self.model.parameters(),
            lr=self.learn_rate,
            momentum=self.momentum)
    def data_forward(self, network, x):
        """Forward through network."""
        logits = network(x)
        return logits
    def grad_backward(self, loss):
        """Compute gradient backward."""
        self.model.zero_grad()
        loss.backward()
    def update(self):
        """Apply gradient."""
        self.optimizer.step()
    def make_batch(self, iterator):
        return Action.make_batch(iterator, output_length=False, use_cuda=self.use_cuda)
@@ -404,4 +387,4 @@ class ClassificationTrainer(BaseTrainer):
            return False
    def _create_validator(self, valid_args):
        return ClassificationTester(valid_args)
        return ClassificationTester(**valid_args)
--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@@ -1,4 +1,5 @@
 from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer
 from fastNLP.core.preprocess import load_pickle
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.loader.model_loader import ModelLoader
@@ -7,14 +8,13 @@ mapping from model name to [URL, file_name.class_name, model_pickle_name]
 Notice that the class of the model should be in "models" directory.
 Example:
    "zh_pos_tag_model": ["www.fudan.edu.cn", "sequence_modeling.SeqLabeling", "saved_model.pkl"]
 """
 FastNLP_MODEL_COLLECTION = {
    "seq_label_model": {
        "url": "www.fudan.edu.cn",
        "class": "sequence_modeling.SeqLabeling",
        "class": "sequence_modeling.SeqLabeling", # file_name.class_name in models/
        "pickle": "seq_label_model.pkl",
        "type": "seq_label"
        "type": "seq_label",
        "config_file_name": "config",   # the name of the config file which stores model initialization parameters
        "config_section_name": "text_class_model" # the name of the section in the config file which stores model init params
    },
    "text_class_model": {
        "url": "www.fudan.edu.cn",
@@ -22,11 +22,18 @@ FastNLP_MODEL_COLLECTION = {
        "pickle": "text_class_model.pkl",
        "type": "text_class"
    }
 """
 FastNLP_MODEL_COLLECTION = {
    "cws_basic_model": {
        "url": "",
        "class": "sequence_modeling.AdvSeqLabel",
        "pickle": "cws_basic_model_v_0.pkl",
        "type": "seq_label",
        "config_file_name": "config",
        "config_section_name": "text_class_model"
    }
 }
 CONFIG_FILE_NAME = "config"
 SECTION_NAME = "text_class_model"
 class FastNLP(object):
    """
@@ -51,10 +58,13 @@ class FastNLP(object):
        self.model = None
        self.infer_type = None  # "seq_label"/"text_class"
    def load(self, model_name):
    def load(self, model_name, config_file="config", section_name="model"):
        """
        Load a pre-trained FastNLP model together with additional data.
        :param model_name: str, the name of a FastNLP model.
        :param config_file: str, the name of the config file which stores the initialization information of the model.
                (default: "config")
        :param section_name: str, the name of the corresponding section in the config file. (default: model)
        """
        assert type(model_name) is str
        if model_name not in FastNLP_MODEL_COLLECTION:
@@ -64,37 +74,47 @@ class FastNLP(object):
            self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"])
        model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"])
        print("Restore model class {}".format(str(model_class)))
        model_args = ConfigSection()
        ConfigLoader.load_config(self.model_dir + CONFIG_FILE_NAME, {SECTION_NAME: model_args})
        ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args})
        print("Restore model hyper-parameters {}".format(str(model_args.data)))
        # fetch dictionary size and number of labels from pickle files
        word2index = load_pickle(self.model_dir, "word2id.pkl")
        model_args["vocab_size"] = len(word2index)
        index2label = load_pickle(self.model_dir, "id2class.pkl")
        model_args["num_classes"] = len(index2label)
        # Construct the model
        model = model_class(model_args)
        print("Model constructed.")
        # To do: framework independent
        ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"])
        print("Model weights loaded.")
        self.model = model
        self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"]
        print("Model loaded. ")
        print("Inference ready.")
    def run(self, raw_input):
        """
        Perform inference over given input using the loaded model.
        :param raw_input: str, raw text
        :param raw_input: list of string. Each list is an input query.
        :return results:
        """
        infer = self._create_inference(self.model_dir)
        # string ---> 2-D list of string
        infer_input = self.string_to_list(raw_input)
        # tokenize: list of string ---> 2-D list of string
        infer_input = self.tokenize(raw_input, language="zh")
        # 2-D list of string ---> list of strings
        # 2-D list of string ---> 2-D list of tags
        results = infer.predict(self.model, infer_input)
        # list of strings ---> final answers
        # 2-D list of tags ---> list of final answers
        outputs = self._make_output(results, infer_input)
        return outputs
@@ -142,81 +162,100 @@ class FastNLP(object):
        """
        return True
    def string_to_list(self, text, delimiter="\n"):
        """
        This function is used to transform raw input to lists, which is done by DatasetLoader in training.
        Split text string into three-level lists.
        [
            [word_11, word_12, ...],
            [word_21, word_22, ...],
            ...
        ]
        :param text: string
        :param delimiter: str, character used to split text into sentences.
        :return data: two-level lists
    def tokenize(self, text, language):
        """Extract tokens from strings.
        For English, extract words separated by space.
        For Chinese, extract characters.
        TODO: more complex tokenization methods
        :param text: list of string
        :param language: str, one of ('zh', 'en'), Chinese or English.
        :return data: list of list of string, each string is a token.
        """
        assert language in ("zh", "en")
        data = []
        sents = text.strip().split(delimiter)
        for sent in sents:
            characters = []
            for ch in sent:
                characters.append(ch)
            data.append(characters)
        for sent in text:
            if language == "en":
                tokens = sent.strip().split()
            elif language == "zh":
                tokens = [char for char in sent]
            else:
                raise RuntimeError("Unknown language {}".format(language))
            data.append(tokens)
        return data
    def _make_output(self, results, infer_input):
        """Transform the infer output into user-friendly output.
        :param results: 1 or 2-D list of strings.
                If self.infer_type == "seq_label", it is of shape [num_examples, tag_seq_length]
                If self.infer_type == "text_class", it is of shape [num_examples]
        :param infer_input: 2-D list of string, the input query before inference.
        :return outputs: list. Each entry is a prediction.
        """
        if self.infer_type == "seq_label":
            outputs = make_seq_label_output(results, infer_input)
        elif self.infer_type == "text_class":
            outputs = make_class_output(results, infer_input)
        else:
            raise ValueError("fail to make outputs with infer type {}".format(self.infer_type))
            raise RuntimeError("fail to make outputs with infer type {}".format(self.infer_type))
        return outputs
 def make_seq_label_output(result, infer_input):
    """
     Transform model output into user-friendly contents.
    :param result: 1-D list of strings. (model output)
    """Transform model output into user-friendly contents.
    :param result: 2-D list of strings. (model output)
    :param infer_input: 2-D list of string (model input)
    :return outputs:
    :return ret: list of list of tuples
        [
            [(word_11, label_11), (word_12, label_12), ...],
            [(word_21, label_21), (word_22, label_22), ...],
            ...
        ]
    """
    return result
    ret = []
    for example_x, example_y in zip(infer_input, result):
        ret.append([(x, y) for x, y in zip(example_x, example_y)])
    return ret
 def make_class_output(result, infer_input):
    """Transform model output into user-friendly contents.
    :param result: 2-D list of strings. (model output)
    :param infer_input: 1-D list of string (model input)
    :return ret: the same as result, [label_1, label_2, ...]
    """
    return result
 def interpret_word_seg_results(infer_input, results):
    """
    Transform model output into user-friendly contents.
 def interpret_word_seg_results(char_seq, label_seq):
    """Transform model output into user-friendly contents.
    Example: In CWS, convert <BMES> labeling into segmented text.
    :param results: list of strings. (model output)
    :param infer_input: 2-D list of string (model input)
    :return output: list of strings
    :param char_seq: list of string,
    :param label_seq: list of string, the same length as char_seq
            Each entry is one of ('B', 'M', 'E', 'S').
    :return output: list of words
    """
    outputs = []
    for sent_char, sent_label in zip(infer_input, results):
        words = []
        word = ""
        for char, label in zip(sent_char, sent_label):
            if label[0] == "B":
                if word != "":
                    words.append(word)
                word = char
            elif label[0] == "M":
                word += char
            elif label[0] == "E":
                word += char
    words = []
    word = ""
    for char, label in zip(char_seq, label_seq):
        if label[0] == "B":
            if word != "":
                words.append(word)
                word = ""
            elif label[0] == "S":
                if word != "":
                    words.append(word)
                word = ""
                words.append(char)
            else:
                raise ValueError("invalid label")
        outputs.append(" ".join(words))
    return outputs
            word = char
        elif label[0] == "M":
            word += char
        elif label[0] == "E":
            word += char
            words.append(word)
            word = ""
        elif label[0] == "S":
            if word != "":
                words.append(word)
            word = ""
            words.append(char)
        else:
            raise ValueError("invalid label {}".format(label[0]))
    return words
--- a/fastNLP/loader/config_loader.py
+++ b/fastNLP/loader/config_loader.py
@@ -94,6 +94,10 @@ class ConfigSection(object):
    def __contains__(self, item):
        return item in self.__dict__.keys()
    @property
    def data(self):
        return self.__dict__
 if __name__ == "__main__":
    config = ConfigLoader('configLoader', 'there is no data')
--- a/fastNLP/models/char_language_model.py
+++ b/fastNLP/models/char_language_model.py
@@ -142,6 +142,8 @@ class CharLM(BaseModel):
            "char_dict": char_dict,
            "reverse_word_dict": reverse_word_dict,
        }
        if not os.path.exists("cache"):
            os.mkdir("cache")
        torch.save(objects, "cache/prep.pt")
        print("Preprocess done.")
--- a/fastNLP/modules/decoder/MLP.py
+++ b/fastNLP/modules/decoder/MLP.py
@@ -0,0 +1,56 @@
 import torch
 import torch.nn as nn
 class MLP(nn.Module):
    def __init__(self, size_layer, num_class=2, activation='relu'):
        """Multilayer Perceptrons as a decoder
        Args:
            size_layer: list of int, define the size of MLP layers
            num_class: int, num of class in output, should be 2 or the last layer's size
            activation: str or function, the activation function for hidden layers
        """
        super(MLP, self).__init__()
        self.hiddens = nn.ModuleList()
        self.output = None
        for i in range(1, len(size_layer)):
            if i + 1 == len(size_layer):
                self.output = nn.Linear(size_layer[i-1], size_layer[i])
            else:
                self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i]))
        if num_class == 2:
            self.out_active = nn.LogSigmoid()
        elif num_class == size_layer[-1]:
            self.out_active = nn.LogSoftmax(dim=1)
        else:
            raise ValueError("should set output num_class correctly: {}".format(num_class))
        actives = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh()
        }
        if activation in actives:
            self.hidden_active = actives[activation]
        elif isinstance(activation, callable):
            self.hidden_active = activation
        else:
            raise ValueError("should set activation correctly: {}".format(activation))
    def forward(self, x):
        for layer in self.hiddens:
            x = self.hidden_active(layer(x))
        x = self.out_active(self.output(x))
        return x
 if __name__ == '__main__':
    net1 = MLP([5,10,5])
    net2 = MLP([5,10,5], 5)
    for net in [net1, net2]:
        x = torch.randn(5, 5)
        y = net(x)
        print(x)
        print(y)
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@@ -15,7 +15,7 @@ class Embedding(nn.Module):
    def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0):
        super(Embedding, self).__init__()
        self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)
        if init_emb:
        if init_emb is not None:
            self.embed.weight = nn.Parameter(init_emb)
        self.dropout = nn.Dropout(dropout)
--- a/fastNLP/modules/encoder/masked_rnn.py
+++ b/fastNLP/modules/encoder/masked_rnn.py
@@ -273,7 +273,7 @@ class MaskedRNNBase(nn.Module):
                hx = (hx, hx)
        func = AutogradMaskedStep(num_layers=self.num_layers,
                                  dropout=self.dropout,
                                  dropout=self.step_dropout,
                                  train=self.training,
                                  lstm=lstm)
--- a/fastnlp-architecture.jpg
+++ b/fastnlp-architecture.jpg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/reproduction/HAN-document_classification/data/test_samples.pkl
+++ b/reproduction/HAN-document_classification/data/test_samples.pkl
--- a/reproduction/HAN-document_classification/data/train_samples.pkl
+++ b/reproduction/HAN-document_classification/data/train_samples.pkl
--- a/reproduction/HAN-document_classification/data/yelp.word2vec
+++ b/reproduction/HAN-document_classification/data/yelp.word2vec
--- a/reproduction/LSTM+self_attention_sentiment_analysis/example.py
+++ b/reproduction/LSTM+self_attention_sentiment_analysis/example.py
@@ -18,7 +18,6 @@ MLP_HIDDEN = 2000
 CLASSES_NUM = 5
 from fastNLP.models.base_model import BaseModel
 from fastNLP.core.trainer import BaseTrainer
 class MyNet(BaseModel):
@@ -60,18 +59,6 @@ class Net(nn.Module):
        return x, penalty
 class MyTrainer(BaseTrainer):
    def __init__(self, args):
        super(MyTrainer, self).__init__(args)
        self.optimizer = None
    def define_optimizer(self):
        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
    def define_loss(self):
        self.loss_func = nn.CrossEntropyLoss()
 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
    momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
    """
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@@ -1,26 +1,26 @@
 import sys, os
 import os
 import sys
 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
 from fastNLP.loader.preprocess import POSPreprocess, load_pickle
 from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import AdvSeqLabel
 from fastNLP.core.inference import SeqLabelInfer
 from fastNLP.core.optimizer import SGD
 from fastNLP.core.predictor import SeqLabelInfer
 # not in the file's dir
 if len(os.path.dirname(__file__)) != 0:
    os.chdir(os.path.dirname(__file__))
 datadir = 'icwb2-data'
 cfgfile = 'cws.cfg'
 datadir = "/home/zyfeng/data/"
 cfgfile = './cws.cfg'
 data_name = "pku_training.utf8"
 cws_data_path = os.path.join(datadir, "training/pku_training.utf8")
 cws_data_path = os.path.join(datadir, "pku_training.utf8")
 pickle_path = "save"
 data_infer_path = os.path.join(datadir, "infer.utf8")
@@ -70,12 +70,13 @@ def train():
    train_data = loader.load_pku()
    # Preprocessor
    p = POSPreprocess(train_data, pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes
    preprocessor = SeqLabelPreprocess()
    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = preprocessor.vocab_size
    train_args["num_classes"] = preprocessor.num_classes
    # Trainer
    trainer = SeqLabelTrainer(train_args)
    trainer = SeqLabelTrainer(**train_args.data)
    # Model
    model = AdvSeqLabel(train_args)
@@ -83,10 +84,11 @@ def train():
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
        print("No saved model. Continue.")
        pass
    # Start training
    trainer.train(model)
    trainer.train(model, data_train, data_dev)
    print("Training finished!")
    # Saver
@@ -106,6 +108,9 @@ def test():
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)
    # load dev data
    dev_data = load_pickle(pickle_path, "data_dev.pkl")
    # Define the same model
    model = AdvSeqLabel(test_args)
@@ -114,10 +119,10 @@ def test():
    print("model loaded!")
    # Tester
    tester = SeqLabelTester(test_args)
    tester = SeqLabelTester(**test_args.data)
    # Start testing
    tester.test(model)
    tester.test(model, dev_data)
    # print test results
    print(tester.show_matrices())
--- a/test/core/test_action.py
+++ b/test/core/test_action.py
@@ -0,0 +1,18 @@
 import os
 import unittest
 from fastNLP.core.action import Action, Batchifier, SequentialSampler
 class TestAction(unittest.TestCase):
    def test_case_1(self):
        x = [1, 2, 3, 4, 5, 6, 7, 8]
        y = [1, 1, 1, 1, 2, 2, 2, 2]
        data = []
        for i in range(len(x)):
            data.append([[x[i]], [y[i]]])
        data = Batchifier(SequentialSampler(data), batch_size=2, drop_last=False)
        action = Action()
        for batch_x in action.make_batch(data, use_cuda=False, output_length=True, max_len=None):
            print(batch_x)
--- a/test/core/test_preprocess.py
+++ b/test/core/test_preprocess.py
@@ -0,0 +1,43 @@
 import os
 import unittest
 from fastNLP.core.preprocess import SeqLabelPreprocess
 class TestSeqLabelPreprocess(unittest.TestCase):
    def test_case_1(self):
        data = [
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
        ]
        if os.path.exists("./save"):
            for root, dirs, files in os.walk("./save", topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
        result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
                                          pickle_path="./save")
        result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
                                          pickle_path="./save")
        if os.path.exists("./save"):
            for root, dirs, files in os.walk("./save", topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
        result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
                                                           pickle_path="./save", train_dev_split=0.4,
                                                           cross_val=True)
        result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
                                          pickle_path="./save", train_dev_split=0.4,
                                          cross_val=True)
--- a/test/core/test_trainer.py
+++ b/test/core/test_trainer.py
@@ -0,0 +1,33 @@
 import os
 import torch.nn as nn
 import unittest
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.core.loss import Loss
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.models.sequence_modeling import SeqLabeling
 class TestTrainer(unittest.TestCase):
    def test_case_1(self):
        args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
                "save_best_dev": True, "model_name": "default_model_name.pkl",
                "loss": Loss(None),
                "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0),
                "vocab_size": 20,
                "word_emb_dim": 100,
                "rnn_hidden_units": 100,
                "num_classes": 3
                }
        trainer = SeqLabelTrainer()
        train_data = [
            [[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]],
            [[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]],
            [[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]],
            [[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]],
            [[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]],
            [[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]],
        ]
        dev_data = train_data
        model = SeqLabeling(args)
        trainer.train(network=model, train_data=train_data, dev_data=dev_data)
--- a/test/data_for_tests/config
+++ b/test/data_for_tests/config
@@ -1,65 +1,11 @@
 [General]
 revision = "first"
 datapath = "./data/smallset/imdb/"
 embed_path = "./data/smallset/imdb/embedding.txt"
 optimizer = "adam"
 attn_mode = "rout"
 seq_encoder = "bilstm"
 out_caps_num = 5
 rout_iter = 3
 max_snt_num = 40
 max_wd_num = 40
 max_epochs = 50
 pre_trained = true
 batch_sz = 32
 batch_sz_min = 32
 bucket_sz = 5000
 partial_update_until_epoch = 2
 embed_size = 300
 hidden_size = 200
 dense_hidden = [300, 10]
 lr = 0.0002
 decay_steps = 1000
 decay_rate = 0.9
 dropout = 0.2
 early_stopping = 7
 reg = 1e-06
 [My]
 datapath = "./data/smallset/imdb/"
 embed_path = "./data/smallset/imdb/embedding.txt"
 optimizer = "adam"
 attn_mode = "rout"
 seq_encoder = "bilstm"
 out_caps_num = 5
 rout_iter = 3
 max_snt_num = 40
 max_wd_num = 40
 max_epochs = 50
 pre_trained = true
 batch_sz = 32
 batch_sz_min = 32
 bucket_sz = 5000
 partial_update_until_epoch = 2
 embed_size = 300
 hidden_size = 200
 dense_hidden = [300, 10]
 lr = 0.0002
 decay_steps = 1000
 decay_rate = 0.9
 dropout = 0.2
 early_stopping = 70
 reg = 1e-05
 test = 5
 new_attr = 40
 [POS]
 [test_seq_label_trainer]
 epochs = 1
 batch_size = 32
 pickle_path = "./data_for_tests/"
 validate = true
 save_best_dev = true
 model_saved_path = "./"
 use_cuda = true
 [test_seq_label_model]
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@@ -68,13 +14,12 @@ dropout = 0.5
 use_crf = true
 use_cuda = true
 [POS_test]
 [test_seq_label_tester]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
 batch_size = 1
 pickle_path = "./data_for_tests/"
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@@ -84,7 +29,6 @@ use_crf = true
 use_cuda = true
 [POS_infer]
 pickle_path = "./data_for_tests/"
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
@@ -95,14 +39,9 @@ num_classes = 27
 [text_class]
 epochs = 1
 batch_size = 10
 pickle_path = "./save_path/"
 validate = false
 save_best_dev = false
 model_saved_path = "./save_path/"
 use_cuda = true
 learn_rate = 1e-3
 momentum = 0.9
 [text_class_model]
 vocab_size = 867
 num_classes = 18
 model_name = "class_model.pkl"
--- a/test/loader/config
+++ b/test/loader/config
@@ -0,0 +1,7 @@
 [test]
 x = 1
 y = 2
 z = 3
 input = [1,2,3]
 text = "this is text"
 doubles = 0.5
--- a/test/loader/test_loader.py
+++ b/test/loader/test_loader.py
@@ -0,0 +1,75 @@
 import os
 import configparser
 import json
 import unittest
 from fastNLP.loader.config_loader import ConfigSection, ConfigLoader
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, POSDatasetLoader, LMDatasetLoader
 class TestConfigLoader(unittest.TestCase):
    def test_case_ConfigLoader(self):
        def read_section_from_config(config_path, section_name):
            dict = {}
            if not os.path.exists(config_path):
                raise FileNotFoundError("config file {} NOT found.".format(config_path))
            cfg = configparser.ConfigParser()
            cfg.read(config_path)
            if section_name not in cfg:
                raise AttributeError("config file {} do NOT have section {}".format(
                    config_path, section_name
                ))
            gen_sec = cfg[section_name]
            for s in gen_sec.keys():
                try:
                    val = json.loads(gen_sec[s])
                    dict[s] = val
                except Exception as e:
                    raise AttributeError("json can NOT load {} in section {}, config file {}".format(
                        s, section_name, config_path
                    ))
            return dict
        test_arg = ConfigSection()
        ConfigLoader("config", "").load_config(os.path.join("./test/loader", "config"), {"test": test_arg})
        #ConfigLoader("config", "").load_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config",
        #                                       {"test": test_arg})
        #dict = read_section_from_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config", "test")
        dict = read_section_from_config(os.path.join("./test/loader", "config"), "test")
        for sec in dict:
            if (sec not in test_arg) or (dict[sec] != test_arg[sec]):
                raise AttributeError("ERROR")
        for sec in test_arg.__dict__.keys():
            if (sec not in dict) or (dict[sec] != test_arg[sec]):
                raise AttributeError("ERROR")
        try:
            not_exist = test_arg["NOT EXIST"]
        except Exception as e:
            pass
        print("pass config test!")
 class TestDatasetLoader(unittest.TestCase):
    def test_case_TokenizeDatasetLoader(self):
        loader = TokenizeDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
        data = loader.load_pku(max_seq_len=32)
        print("pass TokenizeDatasetLoader test!")
    def test_case_POSDatasetLoader(self):
        loader = POSDatasetLoader("people", "./test/data_for_tests/people.txt")
        data = loader.load()
        datas = loader.load_lines()
        print("pass POSDatasetLoader test!")
    def test_case_LMDatasetLoader(self):
        loader = LMDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
        data = loader.load()
        datas = loader.load_lines()
        print("pass TokenizeDatasetLoader test!")
--- a/test/modules/test_masked_rnn.py
+++ b/test/modules/test_masked_rnn.py
@@ -0,0 +1,27 @@
 import torch
 import unittest
 from fastNLP.modules.encoder.masked_rnn import MaskedRNN
 class TestMaskedRnn(unittest.TestCase):
    def test_case_1(self):
        masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=True, batch_first=True)
        x = torch.tensor([[[1.0], [2.0]]])
        print(x.size())
        y = masked_rnn(x)
        mask = torch.tensor([[[1], [1]]])
        y = masked_rnn(x, mask=mask)
        mask = torch.tensor([[[1], [0]]])
        y = masked_rnn(x, mask=mask)
    def test_case_2(self):
        masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=False, batch_first=True)
        x = torch.tensor([[[1.0], [2.0]]])
        print(x.size())
        y = masked_rnn(x)
        mask = torch.tensor([[[1], [1]]])
        y = masked_rnn(x, mask=mask)
        xx = torch.tensor([[[1.0]]])
        y = masked_rnn.step(xx)
        y = masked_rnn.step(xx, mask=mask)
--- a/test/modules/test_other_modules.py
+++ b/test/modules/test_other_modules.py
@@ -0,0 +1,30 @@
 import torch
 import unittest
 from fastNLP.modules.other_modules import GroupNorm, LayerNormalization, BiLinear
 class TestGroupNorm(unittest.TestCase):
    def test_case_1(self):
        gn = GroupNorm(num_features=1, num_groups=10, eps=1.5e-5)
        x = torch.randn((20, 50, 10))
        y = gn(x)
 class TestLayerNormalization(unittest.TestCase):
    def test_case_1(self):
        ln = LayerNormalization(d_hid=5, eps=2e-3)
        x = torch.randn((20, 50, 5))
        y = ln(x)
 class TestBiLinear(unittest.TestCase):
    def test_case_1(self):
        bl = BiLinear(n_left=5, n_right=5, n_out=10, bias=True)
        x_left = torch.randn((7, 10, 20, 5))
        x_right = torch.randn((7, 10, 20, 5))
        y = bl(x_left, x_right)
        print(bl)
        bl2 = BiLinear(n_left=15, n_right=15, n_out=10, bias=True)
--- a/test/modules/test_utils.py
+++ b/test/modules/test_utils.py
@@ -0,0 +1,18 @@
 import torch
 import numpy as np
 import unittest
 import fastNLP.modules.utils as utils
 class TestUtils(unittest.TestCase):
    def test_case_1(self):
        a = torch.tensor([
            [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]
        ])
        utils.orthogonal(a)
    def test_case_2(self):
        a = np.random.rand(100, 100)
        utils.mst(a)
--- a/test/modules/test_variational_rnn.py
+++ b/test/modules/test_variational_rnn.py
@@ -0,0 +1,28 @@
 import torch
 import unittest
 from fastNLP.modules.encoder.variational_rnn import VarMaskedFastLSTM
 class TestMaskedRnn(unittest.TestCase):
    def test_case_1(self):
        masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=True, batch_first=True)
        x = torch.tensor([[[1.0], [2.0]]])
        print(x.size())
        y = masked_rnn(x)
        mask = torch.tensor([[[1], [1]]])
        y = masked_rnn(x, mask=mask)
        mask = torch.tensor([[[1], [0]]])
        y = masked_rnn(x, mask=mask)
    def test_case_2(self):
        masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=False, batch_first=True)
        x = torch.tensor([[[1.0], [2.0]]])
        print(x.size())
        y = masked_rnn(x)
        mask = torch.tensor([[[1], [1]]])
        y = masked_rnn(x, mask=mask)
        xx = torch.tensor([[[1.0]]])
        #y, hidden = masked_rnn.step(xx)
        #step() still has a bug
        #y, hidden = masked_rnn.step(xx, mask=mask)
--- a/test/ner.py
+++ b/test/ner.py
@@ -20,7 +20,7 @@ class MyNERTrainer(SeqLabelTrainer):
        override
        :return:
        """
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
        self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5)
    def update(self):
--- a/test/readme_example.py
+++ b/test/readme_example.py
@@ -13,6 +13,7 @@ from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import aggregation
 from fastNLP.modules import encoder
 from fastNLP.modules import decoder
 class ClassificationModel(BaseModel):
@@ -20,20 +21,20 @@ class ClassificationModel(BaseModel):
    Simple text classification model based on CNN.
    """
    def __init__(self, class_num, vocab_size):
    def __init__(self, num_classes, vocab_size):
        super(ClassificationModel, self).__init__()
        self.embed = encoder.Embedding(nums=vocab_size, dims=300)
        self.conv = encoder.Conv(
        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.pool = aggregation.MaxPool()
        self.output = encoder.Linear(input_size=100, output_size=class_num)
        self.agg = aggregation.MaxPool()
        self.dec = decoder.MLP(100, num_classes=num_classes)
    def forward(self, x):
        x = self.embed(x)  # [N,L] -> [N,L,C]
        x = self.conv(x)  # [N,L,C_in] -> [N,L,C_out]
        x = self.pool(x)  # [N,L,C] -> [N,C]
        x = self.output(x)  # [N,C] -> [N, N_class]
        x = self.emb(x)  # [N,L] -> [N,L,C]
        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
        x = self.agg(x)  # [N,L,C] -> [N,C]
        x = self.dec(x)  # [N,C] -> [N, N_class]
        return x
@@ -55,7 +56,7 @@ model_args = {
    'num_classes': n_classes,
    'vocab_size': vocab_size
 }
 model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size)
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)
 # train model
 train_args = {
@@ -75,4 +76,4 @@ trainer.cross_validate(model)
 # predict using model
 data_infer = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
 labels_pred = infer.predict(model, data_infer)
 labels_pred = infer.predict(model, data_infer)
--- a/test/seq_labeling.py
+++ b/test/seq_labeling.py
@@ -1,7 +1,7 @@
 import os
 import sys
 sys.path.append("..")
 import argparse
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader
@@ -11,17 +11,29 @@ from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
 from fastNLP.core.predictor import SeqLabelInfer
 from fastNLP.core.optimizer import Optimizer
 parser = argparse.ArgumentParser()
 parser.add_argument("-s", "--save", type=str, default="./seq_label/", help="path to save pickle files")
 parser.add_argument("-t", "--train", type=str, default="./data_for_tests/people.txt",
                    help="path to the training data")
 parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file")
 parser.add_argument("-m", "--model_name", type=str, default="seq_label_model.pkl", help="the name of the model")
 parser.add_argument("-i", "--infer", type=str, default="data_for_tests/people_infer.txt",
                    help="data used for inference")
 data_name = "people.txt"
 data_path = "data_for_tests/people.txt"
 pickle_path = "seq_label/"
 data_infer_path = "data_for_tests/people_infer.txt"
 args = parser.parse_args()
 pickle_path = args.save
 model_name = args.model_name
 config_dir = args.config
 data_path = args.train
 data_infer_path = args.infer
 def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
    ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args})
    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -33,11 +45,11 @@ def infer():
    model = SeqLabeling(test_args)
    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl")
    ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name))
    print("model loaded!")
    # Data Loader
    raw_data_loader = BaseLoader(data_name, data_infer_path)
    raw_data_loader = BaseLoader("xxx", data_infer_path)
    infer_data = raw_data_loader.load_lines()
    # Inference interface
@@ -51,49 +63,72 @@ def infer():
 def train_and_test():
    # Config Loader
    train_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
    trainer_args = ConfigSection()
    model_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config(config_dir, {
        "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args})
    # Data Loader
    pos_loader = POSDatasetLoader(data_name, data_path)
    pos_loader = POSDatasetLoader("xxx", data_path)
    train_data = pos_loader.load_lines()
    # Preprocessor
    p = SeqLabelPreprocess()
    data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes
    # Trainer
    trainer = SeqLabelTrainer(train_args)
    model_args["vocab_size"] = p.vocab_size
    model_args["num_classes"] = p.num_classes
    # Trainer: two definition styles
    # 1
    # trainer = SeqLabelTrainer(trainer_args.data)
    # 2
    trainer = SeqLabelTrainer(
        epochs=trainer_args["epochs"],
        batch_size=trainer_args["batch_size"],
        validate=trainer_args["validate"],
        use_cuda=trainer_args["use_cuda"],
        pickle_path=pickle_path,
        save_best_dev=trainer_args["save_best_dev"],
        model_name=model_name,
        optimizer=Optimizer("SGD", lr=0.01, momentum=0.9),
    )
    # Model
    model = SeqLabeling(train_args)
    model = SeqLabeling(model_args)
    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")
    # Saver
    saver = ModelSaver(pickle_path + "saved_model.pkl")
    saver = ModelSaver(os.path.join(pickle_path, model_name))
    saver.save_pytorch(model)
    print("Model saved!")
    del model, trainer, pos_loader
    # Define the same model
    model = SeqLabeling(train_args)
    model = SeqLabeling(model_args)
    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl")
    ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name))
    print("model loaded!")
    # Load test configuration
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
    tester_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args})
    # Tester
    tester = SeqLabelTester(test_args)
    tester = SeqLabelTester(save_output=False,
                            save_loss=False,
                            save_best_dev=False,
                            batch_size=4,
                            use_cuda=False,
                            pickle_path=pickle_path,
                            model_name="seq_label_in_test.pkl",
                            print_every_step=1
                            )
    # Start testing with validation data
    tester.test(model, data_dev)
--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@@ -1,13 +1,24 @@
 import sys
 sys.path.append("..")
 from fastNLP.fastnlp import FastNLP
 from fastNLP.fastnlp import interpret_word_seg_results
 PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
 def word_seg():
    nlp = FastNLP("./data_for_tests/")
    nlp.load("seq_label_model")
    text = "这是最好的基于深度学习的中文分词系统。"
    result = nlp.run(text)
    print(result)
    print("FastNLP finished!")
    nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
    nlp.load("cws_basic_model", config_file="cws.cfg", section_name="POS_test")
    text = ["这是最好的基于深度学习的中文分词系统。",
            "大王叫我来巡山。",
            "我党多年来致力于改善人民生活水平。"]
    results = nlp.run(text)
    print(results)
    for example in results:
        words, labels = [], []
        for res in example:
            words.append(res[0])
            labels.append(res[1])
        print(interpret_word_seg_results(words, labels))
 def text_class():
@@ -19,5 +30,14 @@ def text_class():
    print("FastNLP finished!")
 def test_word_seg_interpret():
    foo = [[('这', 'S'), ('是', 'S'), ('最', 'S'), ('好', 'S'), ('的', 'S'), ('基', 'B'), ('于', 'E'), ('深', 'B'), ('度', 'E'),
            ('学', 'B'), ('习', 'E'), ('的', 'S'), ('中', 'B'), ('文', 'E'), ('分', 'B'), ('词', 'E'), ('系', 'B'), ('统', 'E'),
            ('。', 'S')]]
    chars = [x[0] for x in foo[0]]
    labels = [x[1] for x in foo[0]]
    print(interpret_word_seg_results(chars, labels))
 if __name__ == "__main__":
    text_class()
    word_seg()
--- a/test/text_classify.py
+++ b/test/text_classify.py
@@ -1,6 +1,7 @@
 # Python: 3.5
 # encoding: utf-8
 import argparse
 import os
 import sys
@@ -13,75 +14,105 @@ from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.preprocess import ClassPreprocess
 from fastNLP.models.cnn_text_classification import CNNText
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.loss import Loss
 save_path = "./test_classification/"
 data_dir = "./data_for_tests/"
 train_file = 'text_classify.txt'
 model_name = "model_class.pkl"
 parser = argparse.ArgumentParser()
 parser.add_argument("-s", "--save", type=str, default="./test_classification/", help="path to save pickle files")
 parser.add_argument("-t", "--train", type=str, default="./data_for_tests/text_classify.txt",
                    help="path to the training data")
 parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file")
 parser.add_argument("-m", "--model_name", type=str, default="classify_model.pkl", help="the name of the model")
 args = parser.parse_args()
 save_dir = args.save
 train_data_dir = args.train
 model_name = args.model_name
 config_dir = args.config
 def infer():
    # load dataset
    print("Loading data...")
    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
    ds_loader = ClassDatasetLoader("train", train_data_dir)
    data = ds_loader.load()
    unlabeled_data = [x[0] for x in data]
    # pre-process data
    pre = ClassPreprocess()
    vocab_size, n_classes = pre.run(data, pickle_path=save_path)
    print("vocabulary size:", vocab_size)
    print("number of classes:", n_classes)
    data = pre.run(data, pickle_path=save_dir)
    print("vocabulary size:", pre.vocab_size)
    print("number of classes:", pre.num_classes)
    model_args = ConfigSection()
    ConfigLoader.load_config("data_for_tests/config", {"text_class_model": model_args})
    # TODO: load from config file
    model_args["vocab_size"] = pre.vocab_size
    model_args["num_classes"] = pre.num_classes
    # ConfigLoader.load_config(config_dir, {"text_class_model": model_args})
    # construct model
    print("Building model...")
    cnn = CNNText(model_args)
    # Dump trained parameters into the model
    ModelLoader.load_pytorch(cnn, "./data_for_tests/saved_model.pkl")
    ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name))
    print("model loaded!")
    infer = ClassificationInfer(data_dir)
    infer = ClassificationInfer(pickle_path=save_dir)
    results = infer.predict(cnn, unlabeled_data)
    print(results)
 def train():
    train_args, model_args = ConfigSection(), ConfigSection()
    ConfigLoader.load_config("data_for_tests/config", {"text_class": train_args, "text_class_model": model_args})
    ConfigLoader.load_config(config_dir, {"text_class": train_args})
    # load dataset
    print("Loading data...")
    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
    ds_loader = ClassDatasetLoader("train", train_data_dir)
    data = ds_loader.load()
    print(data[0])
    # pre-process data
    pre = ClassPreprocess()
    data_train = pre.run(data, pickle_path=save_path)
    data_train = pre.run(data, pickle_path=save_dir)
    print("vocabulary size:", pre.vocab_size)
    print("number of classes:", pre.num_classes)
    model_args["num_classes"] = pre.num_classes
    model_args["vocab_size"] = pre.vocab_size
    # construct model
    print("Building model...")
    model = CNNText(model_args)
    # ConfigSaver().save_config(config_dir, {"text_class_model": model_args})
    # train
    print("Training...")
    trainer = ClassificationTrainer(train_args)
    # 1
    # trainer = ClassificationTrainer(train_args)
    # 2
    trainer = ClassificationTrainer(epochs=train_args["epochs"],
                                    batch_size=train_args["batch_size"],
                                    validate=train_args["validate"],
                                    use_cuda=train_args["use_cuda"],
                                    pickle_path=save_dir,
                                    save_best_dev=train_args["save_best_dev"],
                                    model_name=model_name,
                                    loss=Loss("cross_entropy"),
                                    optimizer=Optimizer("SGD", lr=0.001, momentum=0.9))
    trainer.train(model, data_train)
    print("Training finished!")
    saver = ModelSaver("./data_for_tests/saved_model.pkl")
    saver = ModelSaver(os.path.join(save_dir, model_name))
    saver.save_pytorch(model)
    print("Model saved!")
 if __name__ == "__main__":
    train()
    # infer()
    infer()