Merge pull request #2 from fastnlp/master

1
7 years ago · 7ff29877cd
--- a/fastNLP/action/tester.py
+++ b/fastNLP/action/tester.py
@@ -1,87 +1,154 @@
 from collections import namedtuple
 import _pickle

 import numpy as np
 import torch

 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
 from fastNLP.modules.utils import seq_mask


 class Tester(Action):
 class BaseTester(Action):
    """docstring for Tester"""

    TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output",
                                       "save_loss", "batch_size"])

    def __init__(self, test_args):
        """
        :param test_args: named tuple
        """
        super(Tester, self).__init__()
        self.validate_in_training = test_args.validate_in_training
        self.save_dev_input = test_args.save_dev_input
        self.valid_x = None
        self.valid_y = None
        self.save_output = test_args.save_output
        super(BaseTester, self).__init__()
        self.validate_in_training = test_args["validate_in_training"]
        self.save_dev_data = None
        self.save_output = test_args["save_output"]
        self.output = None
        self.save_loss = test_args.save_loss
        self.save_loss = test_args["save_loss"]
        self.mean_loss = None
        self.batch_size = test_args.batch_size

    def test(self, network, data):
        print("testing")
        network.mode(test=True)  # turn on the testing mode
        if self.save_dev_input:
            if self.valid_x is None:
                valid_x, valid_y = network.prepare_input(data)
                self.valid_x = valid_x
                self.valid_y = valid_y
            else:
                valid_x = self.valid_x
                valid_y = self.valid_y
        else:
            valid_x, valid_y = network.prepare_input(data)
        self.batch_size = test_args["batch_size"]
        self.pickle_path = test_args["pickle_path"]
        self.iterator = None

        # split into batches by self.batch_size
        iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y)
        self.model = None
        self.eval_history = []

        batch_output = list()
        loss_history = list()
        # turn on the testing mode of the network
        network.mode(test=True)
    def test(self, network):
        # print("--------------testing----------------")
        self.model = network

        # turn on the testing mode; clean up the history
        self.mode(network, test=True)

        for step in range(iterations):
            batch_x, batch_y = test_batch_generator.__next__()
        dev_data = self.prepare_input(self.pickle_path)

        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))

        batch_output = list()
        num_iter = len(dev_data) // self.batch_size

            # forward pass from test input to predicted output
            prediction = network.data_forward(batch_x)
        for step in range(num_iter):
            batch_x, batch_y = self.batchify(dev_data)

            loss = network.get_loss(prediction, batch_y)
            prediction = self.data_forward(network, batch_x)
            eval_results = self.evaluate(prediction, batch_y)

            if self.save_output:
                batch_output.append(prediction.data)
                batch_output.append(prediction)
            if self.save_loss:
                loss_history.append(loss)
                self.log(self.make_log(step, loss))
                self.eval_history.append(eval_results)

        if self.save_loss:
            self.mean_loss = np.mean(np.array(loss_history))
        if self.save_output:
            self.output = self.make_output(batch_output)

    @property
    def loss(self):
        return self.mean_loss
    def prepare_input(self, data_path):
        """
        Save the dev data once it is loaded. Can return directly next time.
        :param data_path: str, the path to the pickle data for dev
        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
        """
        if self.save_dev_data is None:
            data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
            self.save_dev_data = data_dev
        return self.save_dev_data

    @property
    def result(self):
        return self.output
    def batchify(self, data):
        """
        1. Perform batching from data and produce a batch of training data.
        2. Add padding.
        :param data: list. Each entry is a sample, which is also a list of features and label(s).
            E.g.
                [
                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                    ...
                ]
        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
        """
        indices = next(self.iterator)
        batch = [data[idx] for idx in indices]
        batch_x = [sample[0] for sample in batch]
        batch_y = [sample[1] for sample in batch]
        batch_x = self.pad(batch_x)
        return batch_x, batch_y

    @staticmethod
    def make_output(batch_outputs):
        # construct full prediction with batch outputs
        return np.concatenate(batch_outputs, axis=0)
    def pad(batch, fill=0):
        """
        Pad a batch of samples to maximum length.
        :param batch: list of list
        :param fill: word index to pad, default 0.
        :return: a padded batch
        """
        max_length = max([len(x) for x in batch])
        for idx, sample in enumerate(batch):
            if len(sample) < max_length:
                batch[idx] = sample + [fill * (max_length - len(sample))]
        return batch

    def load_config(self, args):
    def data_forward(self, network, data):
        raise NotImplementedError

    def load_dataset(self, args):
    def evaluate(self, predict, truth):
        raise NotImplementedError

    @property
    def matrices(self):
        raise NotImplementedError

    def mode(self, model, test=True):
        """To do: combine this function with Trainer ?? """
        if test:
            model.eval()
        else:
            model.train()
        self.eval_history.clear()


 class POSTester(BaseTester):
    """
    Tester for sequence labeling.
    """

    def __init__(self, test_args):
        super(POSTester, self).__init__(test_args)
        self.max_len = None
        self.mask = None
        self.batch_result = None

    def data_forward(self, network, x):
        """To Do: combine with Trainer

        :param network: the PyTorch model
        :param x: list of list, [batch_size, max_len]
        :return y: [batch_size, num_classes]
        """
        seq_len = [len(seq) for seq in x]
        x = torch.Tensor(x).long()
        self.batch_size = x.size(0)
        self.max_len = x.size(1)
        self.mask = seq_mask(seq_len, self.max_len)
        y = network(x)
        return y

    def evaluate(self, predict, truth):
        truth = torch.Tensor(truth)
        loss, prediction = self.model.loss(predict, truth, self.mask, self.batch_size, self.max_len)
        return loss.data

    def matrices(self):
        return np.mean(self.eval_history)
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -1,12 +1,12 @@
 import _pickle
 from collections import namedtuple

 import numpy as np
 import torch

 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
 from fastNLP.action.tester import Tester
 from fastNLP.action.tester import POSTester
 from fastNLP.modules.utils import seq_mask


 class BaseTrainer(Action):
@@ -21,23 +21,29 @@ class BaseTrainer(Action):
        - grad_backward
        - get_loss
    """
    TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"])

    def __init__(self, train_args):
        """
        training parameters
        :param train_args: dict of (key, value)

        The base trainer requires the following keys:
        - epochs: int, the number of epochs in training
        - validate: bool, whether or not to validate on dev set
        - batch_size: int
        - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()
        self.n_epochs = train_args.epochs
        self.validate = train_args.validate
        self.batch_size = train_args.batch_size
        self.pickle_path = train_args.pickle_path
        self.n_epochs = train_args["epochs"]
        self.validate = train_args["validate"]
        self.batch_size = train_args["batch_size"]
        self.pickle_path = train_args["pickle_path"]
        self.model = None
        self.iterator = None
        self.loss_func = None
        self.optimizer = None

    def train(self, network):
        """General training loop.
        """General Training Steps
        :param network: a model

        The method is framework independent.
@@ -51,22 +57,27 @@ class BaseTrainer(Action):
            - update
        Subclasses must implement these methods with a specific framework.
        """
        # prepare model and data
        self.model = network
        data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)

        test_args = Tester.TestConfig(save_output=True, validate_in_training=True,
                                      save_dev_input=True, save_loss=True, batch_size=self.batch_size)
        evaluator = Tester(test_args)
        # define tester over dev data
        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                      "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path}
        validator = POSTester(valid_args)

        best_loss = 1e10
        # main training epochs
        iterations = len(data_train) // self.batch_size

        for epoch in range(self.n_epochs):
            self.mode(test=False)

            # turn on network training mode; define optimizer; prepare batch iterator
            self.mode(test=False)
            self.define_optimizer()
            self.iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))

            # training iterations in one epoch
            for step in range(iterations):
                batch_x, batch_y = self.batchify(self.batch_size, data_train)
                batch_x, batch_y = self.batchify(data_train)

                prediction = self.data_forward(network, batch_x)

@@ -77,9 +88,8 @@ class BaseTrainer(Action):
            if self.validate:
                if data_dev is None:
                    raise RuntimeError("No validation data provided.")
                evaluator.test(network, data_dev)
                if evaluator.loss < best_loss:
                    best_loss = evaluator.loss
                validator.test(network)
                print("[epoch {}] dev loss={:.2f}".format(epoch, validator.matrices()))

        # finish training

@@ -155,23 +165,20 @@ class BaseTrainer(Action):
        """
        raise NotImplementedError

    def batchify(self, batch_size, data):
    def batchify(self, data):
        """
        1. Perform batching from data and produce a batch of training data.
        2. Add padding.
        :param batch_size: int, the size of a batch
        :param data: list. Each entry is a sample, which is also a list of features and label(s).
            E.g.
                [
                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 1
                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 2
                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                    ...
                ]
        :return batch_x: list. Each entry is a list of features of a sample.
                 batch_y: list. Each entry is a list of labels of a sample.
        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
        """
        if self.iterator is None:
            self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True))
        indices = next(self.iterator)
        batch = [data[idx] for idx in indices]
        batch_x = [sample[0] for sample in batch]
@@ -195,7 +202,9 @@ class BaseTrainer(Action):


 class ToyTrainer(BaseTrainer):
    """A simple trainer for a PyTorch model."""
    """
        deprecated
    """

    def __init__(self, train_args):
        super(ToyTrainer, self).__init__(train_args)
@@ -230,7 +239,7 @@ class ToyTrainer(BaseTrainer):

 class WordSegTrainer(BaseTrainer):
    """
        reserve for changes
        deprecated
    """

    def __init__(self, train_args):
@@ -301,6 +310,7 @@ class WordSegTrainer(BaseTrainer):
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85)

    def get_loss(self, predict, truth):
        truth = torch.Tensor(truth)
        self._loss = torch.nn.CrossEntropyLoss(predict, truth)
        return self._loss

@@ -313,8 +323,76 @@ class WordSegTrainer(BaseTrainer):
        self.optimizer.step()


 class POSTrainer(BaseTrainer):
    """
    Trainer for Sequence Modeling

    """
    def __init__(self, train_args):
        super(POSTrainer, self).__init__(train_args)
        self.vocab_size = train_args["vocab_size"]
        self.num_classes = train_args["num_classes"]
        self.max_len = None
        self.mask = None

    def prepare_input(self, data_path):
        """
            To do: Load pkl files of train/dev/test and embedding
        """
        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        return data_train, data_dev, 0, 1

    def data_forward(self, network, x):
        """
        :param network: the PyTorch model
        :param x: list of list, [batch_size, max_len]
        :return y: [batch_size, num_classes]
        """
        seq_len = [len(seq) for seq in x]
        x = torch.Tensor(x).long()
        self.batch_size = x.size(0)
        self.max_len = x.size(1)
        self.mask = seq_mask(seq_len, self.max_len)
        y = network(x)
        return y

    def mode(self, test=False):
        if test:
            self.model.eval()
        else:
            self.model.train()

    def define_optimizer(self):
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)

    def grad_backward(self, loss):
        self.model.zero_grad()
        loss.backward()

    def update(self):
        self.optimizer.step()

    def get_loss(self, predict, truth):
        """
        Compute loss given prediction and ground truth.
        :param predict: prediction label vector, [batch_size, num_classes]
        :param truth: ground truth label vector, [batch_size, max_len]
        :return: a scalar
        """
        truth = torch.Tensor(truth)
        if self.loss_func is None:
            if hasattr(self.model, "loss"):
                self.loss_func = self.model.loss
            else:
                self.define_loss()
        loss, prediction = self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
        # print("loss={:.2f}".format(loss.data))
        return loss


 if __name__ == "__name__":
    train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./")
    train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
    trainer = BaseTrainer(train_args)
    data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
    trainer.batchify(batch_size=3, data=data_train)
    trainer.batchify(data=data_train)
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -15,7 +15,6 @@ class POSDatasetLoader(DatasetLoader):

    def __init__(self, data_name, data_path):
        super(POSDatasetLoader, self).__init__(data_name, data_path)
        #self.data_set = self.load()

    def load(self):
        assert os.path.exists(self.data_path)
@@ -24,7 +23,7 @@ class POSDatasetLoader(DatasetLoader):
        return line

    def load_lines(self):
        assert os.path.exists(self.data_path)
        assert (os.path.exists(self.data_path))
        with open(self.data_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
        return lines
--- a/fastNLP/loader/preprocess.py
+++ b/fastNLP/loader/preprocess.py
@@ -46,19 +46,17 @@ class BasePreprocess(object):


 class POSPreprocess(BasePreprocess):

    """
        This class are used to preprocess the pos datasets.
        In these datasets, each line is divided by '\t'
        The first Col is the vocabulary.
        The second Col is the labels.
        In these datasets, each line are divided by '\t'
    while the first Col is the vocabulary and the second
    Col is the label.
        Different sentence are divided by an empty line.
        e.g:
        Tom label1
        and label2
        Jerry   label1
        .   label3

        Hello   label4
        world   label5
        !   label3
@@ -71,11 +69,13 @@ class POSPreprocess(BasePreprocess):
        super(POSPreprocess, self).__init__(data, pickle_path)
        self.word_dict = None
        self.label_dict = None
        self.data = data
        self.pickle_path = pickle_path
        self.build_dict()
        self.word2id()
        self.id2word()
        self.vocab_size = self.id2word()
        self.class2id()
        self.id2class()
        self.num_classes = self.id2class()
        self.embedding()
        self.data_train()
        self.data_dev()
@@ -87,7 +87,8 @@ class POSPreprocess(BasePreprocess):
                          DEFAULT_RESERVED_LABEL[2]: 4}
        self.label_dict = {}
        for w in self.data:
            if len(w) == 0:
            w = w.strip()
            if len(w) <= 1:
                continue
            word = w.split('\t')

@@ -95,10 +96,11 @@ class POSPreprocess(BasePreprocess):
                index = len(self.word_dict)
                self.word_dict[word[0]] = index

            for label in word[1: ]:
                if label not in self.label_dict:
                    index = len(self.label_dict)
                    self.label_dict[label] = index
            # for label in word[1: ]:
            label = word[1]
            if label not in self.label_dict:
                index = len(self.label_dict)
                self.label_dict[label] = index

    def pickle_exist(self, pickle_name):
        """
@@ -107,7 +109,7 @@ class POSPreprocess(BasePreprocess):
        """
        if not os.path.exists(self.pickle_path):
            os.makedirs(self.pickle_path)
        file_name = self.pickle_path + pickle_name
        file_name = os.path.join(self.pickle_path, pickle_name)
        if os.path.exists(file_name):
            return True
        else:
@@ -118,42 +120,48 @@ class POSPreprocess(BasePreprocess):
            return
        # nothing will be done if word2id.pkl exists

        file_name = self.pickle_path + "word2id.pkl"
        with open(file_name, "wb", encoding='utf-8') as f:
        file_name = os.path.join(self.pickle_path, "word2id.pkl")
        with open(file_name, "wb") as f:
            _pickle.dump(self.word_dict, f)

    def id2word(self):
        if self.pickle_exist("id2word.pkl"):
            return
            file_name = os.path.join(self.pickle_path, "id2word.pkl")
            id2word_dict = _pickle.load(open(file_name, "rb"))
            return len(id2word_dict)
        # nothing will be done if id2word.pkl exists

        id2word_dict = {}
        for word in self.word_dict:
            id2word_dict[self.word_dict[word]] = word
        file_name = self.pickle_path + "id2word.pkl"
        with open(file_name, "wb", encoding='utf-8') as f:
        file_name = os.path.join(self.pickle_path, "id2word.pkl")
        with open(file_name, "wb") as f:
            _pickle.dump(id2word_dict, f)
        return len(id2word_dict)

    def class2id(self):
        if self.pickle_exist("class2id.pkl"):
            return
        # nothing will be done if class2id.pkl exists

        file_name = self.pickle_path + "class2id.pkl"
        with open(file_name, "wb", encoding='utf-8') as f:
        file_name = os.path.join(self.pickle_path, "class2id.pkl")
        with open(file_name, "wb") as f:
            _pickle.dump(self.label_dict, f)

    def id2class(self):
        if self.pickle_exist("id2class.pkl"):
            return
            file_name = os.path.join(self.pickle_path, "id2class.pkl")
            id2class_dict = _pickle.load(open(file_name, "rb"))
            return len(id2class_dict)
        # nothing will be done if id2class.pkl exists

        id2class_dict = {}
        for label in self.label_dict:
            id2class_dict[self.label_dict[label]] = label
        file_name = self.pickle_path + "id2class.pkl"
        with open(file_name, "wb", encoding='utf-8') as f:
        file_name = os.path.join(self.pickle_path, "id2class.pkl")
        with open(file_name, "wb") as f:
            _pickle.dump(id2class_dict, f)
        return len(id2class_dict)

    def embedding(self):
        if self.pickle_exist("embedding.pkl"):
@@ -168,22 +176,26 @@ class POSPreprocess(BasePreprocess):
        data_train = []
        sentence = []
        for w in self.data:
            if len(w) == 0:
            w = w.strip()
            if len(w) <= 1:
                wid = []
                lid = []
                for i in range(len(sentence)):
                    # if sentence[i][0]=="":
                    #     print("")
                    wid.append(self.word_dict[sentence[i][0]])
                    lid.append(self.label_dict[sentence[i][1]])
                data_train.append((wid, lid))
                sentence = []
                continue
            sentence.append(w.split('\t'))

        file_name = self.pickle_path + "data_train.pkl"
        with open(file_name, "wb", encoding='utf-8') as f:
        file_name = os.path.join(self.pickle_path, "data_train.pkl")
        with open(file_name, "wb") as f:
            _pickle.dump(data_train, f)

    def data_dev(self):
        pass

    def data_test(self):
        pass
        pass
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@@ -3,32 +3,12 @@ import torch

 class BaseModel(torch.nn.Module):
    """Base PyTorch model for all models.
        Three network modules presented:
            - embedding module
            - aggregation module
            - output module
        Subclasses must implement these three modules with "components".
        To do: add some useful common features
    """

    def __init__(self):
        super(BaseModel, self).__init__()

    def forward(self, *inputs):
        x = self.encode(*inputs)
        x = self.aggregation(x)
        x = self.output(x)
        return x

    def encode(self, x):
        raise NotImplementedError

    def aggregation(self, x):
        raise NotImplementedError

    def output(self, x):
        raise NotImplementedError



 class Vocabulary(object):
    """A look-up table that allows you to access `Lexeme` objects. The `Vocab`
@@ -93,3 +73,4 @@ class Token(object):
        self.doc = doc
        self.token = doc[offset]
        self.i = offset

--- a/fastNLP/models/sequence_modeling.py
+++ b/fastNLP/models/sequence_modeling.py
@@ -0,0 +1,97 @@
 import torch
 import torch.nn as nn
 from torch.nn import functional as F

 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules.CRF import ContionalRandomField


 class SeqLabeling(BaseModel):
    """
    PyTorch Network for sequence labeling
    """

    def __init__(self, hidden_dim,
                 rnn_num_layer,
                 num_classes,
                 vocab_size,
                 word_emb_dim=100,
                 init_emb=None,
                 rnn_mode="gru",
                 bi_direction=False,
                 dropout=0.5,
                 use_crf=True):
        super(SeqLabeling, self).__init__()

        self.Emb = nn.Embedding(vocab_size, word_emb_dim)
        if init_emb:
            self.Emb.weight = nn.Parameter(init_emb)

        self.num_classes = num_classes
        self.input_dim = word_emb_dim
        self.layers = rnn_num_layer
        self.hidden_dim = hidden_dim
        self.bi_direction = bi_direction
        self.dropout = dropout
        self.mode = rnn_mode

        if self.mode == "lstm":
            self.rnn = nn.LSTM(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
                               bidirectional=self.bi_direction, dropout=self.dropout)
        elif self.mode == "gru":
            self.rnn = nn.GRU(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
                              bidirectional=self.bi_direction, dropout=self.dropout)
        elif self.mode == "rnn":
            self.rnn = nn.RNN(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
                              bidirectional=self.bi_direction, dropout=self.dropout)
        else:
            raise Exception
        if bi_direction:
            self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
        else:
            self.linear = nn.Linear(self.hidden_dim, self.num_classes)
        self.use_crf = use_crf
        if self.use_crf:
            self.crf = ContionalRandomField(num_classes)

    def forward(self, x):
        """
        :param x: LongTensor, [batch_size, mex_len]
        :return y: [batch_size, tag_size, tag_size]
        """
        x = self.Emb(x)
        # [batch_size, max_len, word_emb_dim]
        x, hidden = self.rnn(x)
        # [batch_size, max_len, hidden_size * direction]
        y = self.linear(x)
        # [batch_size, max_len, num_classes]
        return y

    def loss(self, x, y, mask, batch_size, max_len):
        """
        Negative log likelihood loss.
        :param x: FloatTensor, [batch_size, tag_size, tag_size]
        :param y: LongTensor, [batch_size, max_len]
        :param mask: ByteTensor, [batch_size, max_len]
        :param batch_size: int
        :param max_len: int
        :return loss:
                prediction:
        """
        x = x.float()
        y = y.long()
        mask = mask.byte()
        # print(x.shape, y.shape, mask.shape)

        if self.use_crf:
            total_loss = self.crf(x, y, mask)
            tag_seq = self.crf.viterbi_decode(x, mask)
        else:
            # error
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            x = x.view(batch_size * max_len, -1)
            score = F.log_softmax(x)
            total_loss = loss_function(score, y.view(batch_size * max_len))
            _, tag_seq = torch.max(score)
            tag_seq = tag_seq.view(batch_size, max_len)
        return torch.mean(total_loss), tag_seq
--- a/fastNLP/modules/CRF.py
+++ b/fastNLP/modules/CRF.py
@@ -82,7 +82,7 @@ class ContionalRandomField(nn.Module):
    def _glod_score(self, feats, tags, masks):
        """
        Compute the score for the gold path.
        :param feats: FloatTensor, batch_size x tag_size x tag_size
        :param feats: FloatTensor, batch_size x max_len x tag_size
        :param tags: LongTensor, batch_size x max_len
        :param masks: ByteTensor, batch_size x max_len
        :return:FloatTensor, batch_size
@@ -118,7 +118,7 @@ class ContionalRandomField(nn.Module):
    def forward(self, feats, tags, masks):
        """
        Calculate the neg log likelihood
        :param feats:FloatTensor, batch_size x tag_size x tag_size
        :param feats:FloatTensor, batch_size x max_len x tag_size
        :param tags:LongTensor, batch_size x max_len
        :param masks:ByteTensor batch_size x max_len
        :return:FloatTensor, batch_size
--- a/fastNLP/modules/prototype/example.py
+++ b/fastNLP/modules/prototype/example.py
@@ -1,12 +1,13 @@
 import torch
 import torch.nn as nn
 import encoder
 import time

 import aggregation
 import dataloader
 import embedding
 import encoder
 import predict
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import time
 import dataloader

 WORD_NUM = 357361
 WORD_SIZE = 100
@@ -16,6 +17,30 @@ R = 10
 MLP_HIDDEN = 2000 
 CLASSES_NUM = 5

 from fastNLP.models.base_model import BaseModel
 from fastNLP.action.trainer import BaseTrainer


 class MyNet(BaseModel):
    def __init__(self):
        super(MyNet, self).__init__()
        self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE)
        self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True)
        self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R)
        self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM)
        self.penalty = None

    def encode(self, x):
        return self.encode(self.embedding(x))

    def aggregate(self, x):
        x, self.penalty = self.aggregate(x)
        return x

    def decode(self, x):
        return [self.predict(x), self.penalty]


 class Net(nn.Module):
    """
    A model for sentiment analysis using lstm and self-attention
@@ -34,6 +59,19 @@ class Net(nn.Module):
        x = self.predict(x)
        return x, penalty


 class MyTrainer(BaseTrainer):
    def __init__(self, args):
        super(MyTrainer, self).__init__(args)
        self.optimizer = None

    def define_optimizer(self):
        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)

    def define_loss(self):
        self.loss_func = nn.CrossEntropyLoss()


 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
    momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
    """
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@@ -7,3 +7,9 @@ def mask_softmax(matrix, mask):
    else:
        raise NotImplementedError
    return result


 def seq_mask(seq_len, max_len):
    mask = [torch.ge(torch.LongTensor(seq_len), i + 1) for i in range(max_len)]
    mask = torch.stack(mask, 1)
    return mask
--- a/fastNLP/reproduction/init.py
+++ b/fastNLP/reproduction/init.py
--- a/fastNLP/reproduction/CNN-sentence_classification/.gitignore
+++ b/fastNLP/reproduction/CNN-sentence_classification/.gitignore
--- a/fastNLP/reproduction/CNN-sentence_classification/README.md
+++ b/fastNLP/reproduction/CNN-sentence_classification/README.md
--- a/fastNLP/reproduction/CNN-sentence_classification/init.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/init.py
--- a/fastNLP/reproduction/CNN-sentence_classification/dataset.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/dataset.py
--- a/fastNLP/reproduction/CNN-sentence_classification/model.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/model.py
--- a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/fastNLP/reproduction/CNN-sentence_classification/train.py
+++ b/fastNLP/reproduction/CNN-sentence_classification/train.py
--- a/fastNLP/reproduction/Char-aware_NLM/LICENSE
+++ b/fastNLP/reproduction/Char-aware_NLM/LICENSE
--- a/fastNLP/reproduction/Char-aware_NLM/README.md
+++ b/fastNLP/reproduction/Char-aware_NLM/README.md
--- a/fastNLP/reproduction/Char-aware_NLM/init.py
+++ b/fastNLP/reproduction/Char-aware_NLM/init.py
--- a/fastNLP/reproduction/Char-aware_NLM/model.py
+++ b/fastNLP/reproduction/Char-aware_NLM/model.py
--- a/fastNLP/reproduction/Char-aware_NLM/test.py
+++ b/fastNLP/reproduction/Char-aware_NLM/test.py
--- a/fastNLP/reproduction/Char-aware_NLM/test.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/test.txt
--- a/fastNLP/reproduction/Char-aware_NLM/train.py
+++ b/fastNLP/reproduction/Char-aware_NLM/train.py
--- a/fastNLP/reproduction/Char-aware_NLM/train.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/train.txt
--- a/fastNLP/reproduction/Char-aware_NLM/utilities.py
+++ b/fastNLP/reproduction/Char-aware_NLM/utilities.py
--- a/fastNLP/reproduction/Char-aware_NLM/valid.txt
+++ b/fastNLP/reproduction/Char-aware_NLM/valid.txt
--- a/fastNLP/reproduction/HAN-document_classification/README.md
+++ b/fastNLP/reproduction/HAN-document_classification/README.md
--- a/fastNLP/reproduction/HAN-document_classification/init.py
+++ b/fastNLP/reproduction/HAN-document_classification/init.py
--- a/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
+++ b/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
--- a/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
+++ b/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
--- a/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
+++ b/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
--- a/fastNLP/reproduction/HAN-document_classification/evaluate.py
+++ b/fastNLP/reproduction/HAN-document_classification/evaluate.py
--- a/fastNLP/reproduction/HAN-document_classification/model.py
+++ b/fastNLP/reproduction/HAN-document_classification/model.py
--- a/fastNLP/reproduction/HAN-document_classification/preprocess.py
+++ b/fastNLP/reproduction/HAN-document_classification/preprocess.py
--- a/fastNLP/reproduction/HAN-document_classification/train.py
+++ b/fastNLP/reproduction/HAN-document_classification/train.py
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 numpy==1.14.2
 numpy>=1.14.2
 torch==0.4.0
 torchvision==0.1.8
 torchvision>=0.1.8
--- a/test/data_for_tests/people.txt
+++ b/test/data_for_tests/people.txt
@@ -0,0 +1,67 @@
 迈	B-v
 向	E-v
 充	B-v
 满	E-v
 希	B-n
 望	E-n
 的	S-u
 新	S-a
 世	B-n
 纪	E-n
 —	B-w
 —	E-w
 一	B-t
 九	M-t
 九	M-t
 八	M-t
 年	E-t
 新	B-t
 年	E-t
 讲	B-n
 话	E-n
 （	S-w
 附	S-v
 图	B-n
 片	E-n
 1	S-m
 张	S-q
 ）	S-w

 中	B-nt
 共	M-nt
 中	M-nt
 央	E-nt
 总	B-n
 书	M-n
 记	E-n
 、	S-w
 国	B-n
 家	E-n
 主	B-n
 席	E-n
 江	B-nr
 泽	M-nr
 民	E-nr

 （	S-w
 一	B-t
 九	M-t
 九	M-t
 七	M-t
 年	E-t
 十	B-t
 二	M-t
 月	E-t
 三	B-t
 十	M-t
 一	M-t
 日	E-t
 ）	S-w

 1	B-t
 2	M-t
 月	E-t
 3	B-t
 1	M-t
 日	E-t
 ，	S-w
--- a/test/test_POS_pipeline.py
+++ b/test/test_POS_pipeline.py
@@ -0,0 +1,35 @@
 import sys

 sys.path.append("..")

 from fastNLP.action.trainer import POSTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader
 from fastNLP.loader.preprocess import POSPreprocess
 from fastNLP.models.sequence_modeling import SeqLabeling

 data_name = "people.txt"
 data_path = "data_for_tests/people.txt"
 pickle_path = "data_for_tests"

 if __name__ == "__main__":
    # Data Loader
    pos = POSDatasetLoader(data_name, data_path)
    train_data = pos.load_lines()

    # Preprocessor
    p = POSPreprocess(train_data, pickle_path)
    vocab_size = p.vocab_size
    num_classes = p.num_classes

    # Trainer
    train_args = {"epochs": 20, "batch_size": 1, "num_classes": num_classes,
                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": True}
    trainer = POSTrainer(train_args)

    # Model
    model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True)

    # Start training
    trainer.train(model)

    print("Training finished!")