diff --git a/fastNLP/action/tester.py b/fastNLP/action/tester.py
index 7f660bb0..9d32ec40 100644
--- a/fastNLP/action/tester.py
+++ b/fastNLP/action/tester.py
@@ -1,87 +1,154 @@
-from collections import namedtuple
+import _pickle
 
 import numpy as np
+import torch
 
 from fastNLP.action.action import Action
+from fastNLP.action.action import RandomSampler, Batchifier
+from fastNLP.modules.utils import seq_mask
 
 
-class Tester(Action):
+class BaseTester(Action):
     """docstring for Tester"""
 
-    TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output",
-                                       "save_loss", "batch_size"])
-
     def __init__(self, test_args):
         """
         :param test_args: named tuple
         """
-        super(Tester, self).__init__()
-        self.validate_in_training = test_args.validate_in_training
-        self.save_dev_input = test_args.save_dev_input
-        self.valid_x = None
-        self.valid_y = None
-        self.save_output = test_args.save_output
+        super(BaseTester, self).__init__()
+        self.validate_in_training = test_args["validate_in_training"]
+        self.save_dev_data = None
+        self.save_output = test_args["save_output"]
         self.output = None
-        self.save_loss = test_args.save_loss
+        self.save_loss = test_args["save_loss"]
         self.mean_loss = None
-        self.batch_size = test_args.batch_size
-
-    def test(self, network, data):
-        print("testing")
-        network.mode(test=True)  # turn on the testing mode
-        if self.save_dev_input:
-            if self.valid_x is None:
-                valid_x, valid_y = network.prepare_input(data)
-                self.valid_x = valid_x
-                self.valid_y = valid_y
-            else:
-                valid_x = self.valid_x
-                valid_y = self.valid_y
-        else:
-            valid_x, valid_y = network.prepare_input(data)
+        self.batch_size = test_args["batch_size"]
+        self.pickle_path = test_args["pickle_path"]
+        self.iterator = None
 
-        # split into batches by self.batch_size
-        iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y)
+        self.model = None
+        self.eval_history = []
 
-        batch_output = list()
-        loss_history = list()
-        # turn on the testing mode of the network
-        network.mode(test=True)
+    def test(self, network):
+        # print("--------------testing----------------")
+        self.model = network
+
+        # turn on the testing mode; clean up the history
+        self.mode(network, test=True)
 
-        for step in range(iterations):
-            batch_x, batch_y = test_batch_generator.__next__()
+        dev_data = self.prepare_input(self.pickle_path)
+
+        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
+
+        batch_output = list()
+        num_iter = len(dev_data) // self.batch_size
 
-            # forward pass from test input to predicted output
-            prediction = network.data_forward(batch_x)
+        for step in range(num_iter):
+            batch_x, batch_y = self.batchify(dev_data)
 
-            loss = network.get_loss(prediction, batch_y)
+            prediction = self.data_forward(network, batch_x)
+            eval_results = self.evaluate(prediction, batch_y)
 
             if self.save_output:
-                batch_output.append(prediction.data)
+                batch_output.append(prediction)
             if self.save_loss:
-                loss_history.append(loss)
-                self.log(self.make_log(step, loss))
+                self.eval_history.append(eval_results)
 
-        if self.save_loss:
-            self.mean_loss = np.mean(np.array(loss_history))
-        if self.save_output:
-            self.output = self.make_output(batch_output)
-
-    @property
-    def loss(self):
-        return self.mean_loss
+    def prepare_input(self, data_path):
+        """
+        Save the dev data once it is loaded. Can return directly next time.
+        :param data_path: str, the path to the pickle data for dev
+        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
+        """
+        if self.save_dev_data is None:
+            data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+            self.save_dev_data = data_dev
+        return self.save_dev_data
 
-    @property
-    def result(self):
-        return self.output
+    def batchify(self, data):
+        """
+        1. Perform batching from data and produce a batch of training data.
+        2. Add padding.
+        :param data: list. Each entry is a sample, which is also a list of features and label(s).
+            E.g.
+                [
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
+                    ...
+                ]
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
+        """
+        indices = next(self.iterator)
+        batch = [data[idx] for idx in indices]
+        batch_x = [sample[0] for sample in batch]
+        batch_y = [sample[1] for sample in batch]
+        batch_x = self.pad(batch_x)
+        return batch_x, batch_y
 
     @staticmethod
-    def make_output(batch_outputs):
-        # construct full prediction with batch outputs
-        return np.concatenate(batch_outputs, axis=0)
+    def pad(batch, fill=0):
+        """
+        Pad a batch of samples to maximum length.
+        :param batch: list of list
+        :param fill: word index to pad, default 0.
+        :return: a padded batch
+        """
+        max_length = max([len(x) for x in batch])
+        for idx, sample in enumerate(batch):
+            if len(sample) < max_length:
+                batch[idx] = sample + [fill * (max_length - len(sample))]
+        return batch
 
-    def load_config(self, args):
+    def data_forward(self, network, data):
         raise NotImplementedError
 
-    def load_dataset(self, args):
+    def evaluate(self, predict, truth):
         raise NotImplementedError
+
+    @property
+    def matrices(self):
+        raise NotImplementedError
+
+    def mode(self, model, test=True):
+        """To do: combine this function with Trainer ?? """
+        if test:
+            model.eval()
+        else:
+            model.train()
+        self.eval_history.clear()
+
+
+class POSTester(BaseTester):
+    """
+    Tester for sequence labeling.
+    """
+
+    def __init__(self, test_args):
+        super(POSTester, self).__init__(test_args)
+        self.max_len = None
+        self.mask = None
+        self.batch_result = None
+
+    def data_forward(self, network, x):
+        """To Do: combine with Trainer
+
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
+        seq_len = [len(seq) for seq in x]
+        x = torch.Tensor(x).long()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        y = network(x)
+        return y
+
+    def evaluate(self, predict, truth):
+        truth = torch.Tensor(truth)
+        loss, prediction = self.model.loss(predict, truth, self.mask, self.batch_size, self.max_len)
+        return loss.data
+
+    def matrices(self):
+        return np.mean(self.eval_history)
diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 437ab7d2..0ab9fee7 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -1,12 +1,12 @@
 import _pickle
-from collections import namedtuple
 
 import numpy as np
 import torch
 
 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
-from fastNLP.action.tester import Tester
+from fastNLP.action.tester import POSTester
+from fastNLP.modules.utils import seq_mask
 
 
 class BaseTrainer(Action):
@@ -21,23 +21,29 @@ class BaseTrainer(Action):
         - grad_backward
         - get_loss
     """
-    TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"])
 
     def __init__(self, train_args):
         """
-        training parameters
+        :param train_args: dict of (key, value)
+
+        The base trainer requires the following keys:
+        - epochs: int, the number of epochs in training
+        - validate: bool, whether or not to validate on dev set
+        - batch_size: int
+        - pickle_path: str, the path to pickle files for pre-processing
         """
         super(BaseTrainer, self).__init__()
-        self.n_epochs = train_args.epochs
-        self.validate = train_args.validate
-        self.batch_size = train_args.batch_size
-        self.pickle_path = train_args.pickle_path
+        self.n_epochs = train_args["epochs"]
+        self.validate = train_args["validate"]
+        self.batch_size = train_args["batch_size"]
+        self.pickle_path = train_args["pickle_path"]
         self.model = None
         self.iterator = None
         self.loss_func = None
+        self.optimizer = None
 
     def train(self, network):
-        """General training loop.
+        """General Training Steps
         :param network: a model
 
         The method is framework independent.
@@ -51,22 +57,27 @@ class BaseTrainer(Action):
             - update
         Subclasses must implement these methods with a specific framework.
         """
+        # prepare model and data
         self.model = network
         data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)
 
-        test_args = Tester.TestConfig(save_output=True, validate_in_training=True,
-                                      save_dev_input=True, save_loss=True, batch_size=self.batch_size)
-        evaluator = Tester(test_args)
+        # define tester over dev data
+        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
+                      "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path}
+        validator = POSTester(valid_args)
 
-        best_loss = 1e10
+        # main training epochs
         iterations = len(data_train) // self.batch_size
-
         for epoch in range(self.n_epochs):
-            self.mode(test=False)
 
+            # turn on network training mode; define optimizer; prepare batch iterator
+            self.mode(test=False)
             self.define_optimizer()
+            self.iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))
+
+            # training iterations in one epoch
             for step in range(iterations):
-                batch_x, batch_y = self.batchify(self.batch_size, data_train)
+                batch_x, batch_y = self.batchify(data_train)
 
                 prediction = self.data_forward(network, batch_x)
 
@@ -77,9 +88,8 @@ class BaseTrainer(Action):
             if self.validate:
                 if data_dev is None:
                     raise RuntimeError("No validation data provided.")
-                evaluator.test(network, data_dev)
-                if evaluator.loss < best_loss:
-                    best_loss = evaluator.loss
+                validator.test(network)
+                print("[epoch {}] dev loss={:.2f}".format(epoch, validator.matrices()))
 
         # finish training
 
@@ -155,23 +165,20 @@ class BaseTrainer(Action):
         """
         raise NotImplementedError
 
-    def batchify(self, batch_size, data):
+    def batchify(self, data):
         """
         1. Perform batching from data and produce a batch of training data.
         2. Add padding.
-        :param batch_size: int, the size of a batch
         :param data: list. Each entry is a sample, which is also a list of features and label(s).
             E.g.
                 [
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 1
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 2
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                     ...
                 ]
-        :return batch_x: list. Each entry is a list of features of a sample.
-                 batch_y: list. Each entry is a list of labels of a sample.
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
         """
-        if self.iterator is None:
-            self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True))
         indices = next(self.iterator)
         batch = [data[idx] for idx in indices]
         batch_x = [sample[0] for sample in batch]
@@ -195,7 +202,9 @@ class BaseTrainer(Action):
 
 
 class ToyTrainer(BaseTrainer):
-    """A simple trainer for a PyTorch model."""
+    """
+        deprecated
+    """
 
     def __init__(self, train_args):
         super(ToyTrainer, self).__init__(train_args)
@@ -230,7 +239,7 @@ class ToyTrainer(BaseTrainer):
 
 class WordSegTrainer(BaseTrainer):
     """
-        reserve for changes
+        deprecated
     """
 
     def __init__(self, train_args):
@@ -301,6 +310,7 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85)
 
     def get_loss(self, predict, truth):
+        truth = torch.Tensor(truth)
         self._loss = torch.nn.CrossEntropyLoss(predict, truth)
         return self._loss
 
@@ -313,8 +323,76 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer.step()
 
 
+class POSTrainer(BaseTrainer):
+    """
+    Trainer for Sequence Modeling
+
+    """
+    def __init__(self, train_args):
+        super(POSTrainer, self).__init__(train_args)
+        self.vocab_size = train_args["vocab_size"]
+        self.num_classes = train_args["num_classes"]
+        self.max_len = None
+        self.mask = None
+
+    def prepare_input(self, data_path):
+        """
+            To do: Load pkl files of train/dev/test and embedding
+        """
+        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        return data_train, data_dev, 0, 1
+
+    def data_forward(self, network, x):
+        """
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
+        seq_len = [len(seq) for seq in x]
+        x = torch.Tensor(x).long()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        y = network(x)
+        return y
+
+    def mode(self, test=False):
+        if test:
+            self.model.eval()
+        else:
+            self.model.train()
+
+    def define_optimizer(self):
+        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
+    def grad_backward(self, loss):
+        self.model.zero_grad()
+        loss.backward()
+
+    def update(self):
+        self.optimizer.step()
+
+    def get_loss(self, predict, truth):
+        """
+        Compute loss given prediction and ground truth.
+        :param predict: prediction label vector, [batch_size, num_classes]
+        :param truth: ground truth label vector, [batch_size, max_len]
+        :return: a scalar
+        """
+        truth = torch.Tensor(truth)
+        if self.loss_func is None:
+            if hasattr(self.model, "loss"):
+                self.loss_func = self.model.loss
+            else:
+                self.define_loss()
+        loss, prediction = self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
+        # print("loss={:.2f}".format(loss.data))
+        return loss
+
+
 if __name__ == "__name__":
-    train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./")
+    train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
     trainer = BaseTrainer(train_args)
     data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
-    trainer.batchify(batch_size=3, data=data_train)
+    trainer.batchify(data=data_train)
diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py
index 7132eb3b..d57a48db 100644
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -15,7 +15,6 @@ class POSDatasetLoader(DatasetLoader):
 
     def __init__(self, data_name, data_path):
         super(POSDatasetLoader, self).__init__(data_name, data_path)
-        #self.data_set = self.load()
 
     def load(self):
         assert os.path.exists(self.data_path)
@@ -24,7 +23,7 @@ class POSDatasetLoader(DatasetLoader):
         return line
 
     def load_lines(self):
-        assert os.path.exists(self.data_path)
+        assert (os.path.exists(self.data_path))
         with open(self.data_path, "r", encoding="utf-8") as f:
             lines = f.readlines()
         return lines
diff --git a/fastNLP/loader/preprocess.py b/fastNLP/loader/preprocess.py
index b8d88c35..8b9c6d88 100644
--- a/fastNLP/loader/preprocess.py
+++ b/fastNLP/loader/preprocess.py
@@ -46,19 +46,17 @@ class BasePreprocess(object):
 
 
 class POSPreprocess(BasePreprocess):
-
     """
         This class are used to preprocess the pos datasets.
-        In these datasets, each line is divided by '\t'
-        The first Col is the vocabulary.
-        The second Col is the labels.
+        In these datasets, each line are divided by '\t'
+    while the first Col is the vocabulary and the second
+    Col is the label.
         Different sentence are divided by an empty line.
         e.g:
         Tom label1
         and label2
         Jerry   label1
         .   label3
-
         Hello   label4
         world   label5
         !   label3
@@ -71,11 +69,13 @@ class POSPreprocess(BasePreprocess):
         super(POSPreprocess, self).__init__(data, pickle_path)
         self.word_dict = None
         self.label_dict = None
+        self.data = data
+        self.pickle_path = pickle_path
         self.build_dict()
         self.word2id()
-        self.id2word()
+        self.vocab_size = self.id2word()
         self.class2id()
-        self.id2class()
+        self.num_classes = self.id2class()
         self.embedding()
         self.data_train()
         self.data_dev()
@@ -87,7 +87,8 @@ class POSPreprocess(BasePreprocess):
                           DEFAULT_RESERVED_LABEL[2]: 4}
         self.label_dict = {}
         for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                 continue
             word = w.split('\t')
 
@@ -95,10 +96,11 @@ class POSPreprocess(BasePreprocess):
                 index = len(self.word_dict)
                 self.word_dict[word[0]] = index
 
-            for label in word[1: ]:
-                if label not in self.label_dict:
-                    index = len(self.label_dict)
-                    self.label_dict[label] = index
+            # for label in word[1: ]:
+            label = word[1]
+            if label not in self.label_dict:
+                index = len(self.label_dict)
+                self.label_dict[label] = index
 
     def pickle_exist(self, pickle_name):
         """
@@ -107,7 +109,7 @@ class POSPreprocess(BasePreprocess):
         """
         if not os.path.exists(self.pickle_path):
             os.makedirs(self.pickle_path)
-        file_name = self.pickle_path + pickle_name
+        file_name = os.path.join(self.pickle_path, pickle_name)
         if os.path.exists(file_name):
             return True
         else:
@@ -118,42 +120,48 @@ class POSPreprocess(BasePreprocess):
             return
         # nothing will be done if word2id.pkl exists
 
-        file_name = self.pickle_path + "word2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "word2id.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(self.word_dict, f)
 
     def id2word(self):
         if self.pickle_exist("id2word.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2word.pkl")
+            id2word_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2word_dict)
         # nothing will be done if id2word.pkl exists
 
         id2word_dict = {}
         for word in self.word_dict:
             id2word_dict[self.word_dict[word]] = word
-        file_name = self.pickle_path + "id2word.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2word.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(id2word_dict, f)
+        return len(id2word_dict)
 
     def class2id(self):
         if self.pickle_exist("class2id.pkl"):
             return
         # nothing will be done if class2id.pkl exists
 
-        file_name = self.pickle_path + "class2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "class2id.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(self.label_dict, f)
 
     def id2class(self):
         if self.pickle_exist("id2class.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2class.pkl")
+            id2class_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2class_dict)
         # nothing will be done if id2class.pkl exists
 
         id2class_dict = {}
         for label in self.label_dict:
             id2class_dict[self.label_dict[label]] = label
-        file_name = self.pickle_path + "id2class.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2class.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(id2class_dict, f)
+        return len(id2class_dict)
 
     def embedding(self):
         if self.pickle_exist("embedding.pkl"):
@@ -168,22 +176,26 @@ class POSPreprocess(BasePreprocess):
         data_train = []
         sentence = []
         for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                 wid = []
                 lid = []
                 for i in range(len(sentence)):
+                    # if sentence[i][0]=="":
+                    #     print("")
                     wid.append(self.word_dict[sentence[i][0]])
                     lid.append(self.label_dict[sentence[i][1]])
                 data_train.append((wid, lid))
                 sentence = []
+                continue
             sentence.append(w.split('\t'))
 
-        file_name = self.pickle_path + "data_train.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "data_train.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(data_train, f)
 
     def data_dev(self):
         pass
 
     def data_test(self):
-        pass
+        pass
\ No newline at end of file
diff --git a/fastNLP/models/base_model.py b/fastNLP/models/base_model.py
index 9249e2e3..24dfdb85 100644
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@@ -3,32 +3,12 @@ import torch
 
 class BaseModel(torch.nn.Module):
     """Base PyTorch model for all models.
-        Three network modules presented:
-            - embedding module
-            - aggregation module
-            - output module
-        Subclasses must implement these three modules with "components".
+        To do: add some useful common features
     """
 
     def __init__(self):
         super(BaseModel, self).__init__()
 
-    def forward(self, *inputs):
-        x = self.encode(*inputs)
-        x = self.aggregation(x)
-        x = self.output(x)
-        return x
-
-    def encode(self, x):
-        raise NotImplementedError
-
-    def aggregation(self, x):
-        raise NotImplementedError
-
-    def output(self, x):
-        raise NotImplementedError
-
-
 
 class Vocabulary(object):
     """A look-up table that allows you to access `Lexeme` objects. The `Vocab`
@@ -93,3 +73,4 @@ class Token(object):
         self.doc = doc
         self.token = doc[offset]
         self.i = offset
+
diff --git a/fastNLP/models/sequence_modeling.py b/fastNLP/models/sequence_modeling.py
new file mode 100644
index 00000000..80d13cf3
--- /dev/null
+++ b/fastNLP/models/sequence_modeling.py
@@ -0,0 +1,97 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from fastNLP.models.base_model import BaseModel
+from fastNLP.modules.CRF import ContionalRandomField
+
+
+class SeqLabeling(BaseModel):
+    """
+    PyTorch Network for sequence labeling
+    """
+
+    def __init__(self, hidden_dim,
+                 rnn_num_layer,
+                 num_classes,
+                 vocab_size,
+                 word_emb_dim=100,
+                 init_emb=None,
+                 rnn_mode="gru",
+                 bi_direction=False,
+                 dropout=0.5,
+                 use_crf=True):
+        super(SeqLabeling, self).__init__()
+
+        self.Emb = nn.Embedding(vocab_size, word_emb_dim)
+        if init_emb:
+            self.Emb.weight = nn.Parameter(init_emb)
+
+        self.num_classes = num_classes
+        self.input_dim = word_emb_dim
+        self.layers = rnn_num_layer
+        self.hidden_dim = hidden_dim
+        self.bi_direction = bi_direction
+        self.dropout = dropout
+        self.mode = rnn_mode
+
+        if self.mode == "lstm":
+            self.rnn = nn.LSTM(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                               bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "gru":
+            self.rnn = nn.GRU(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "rnn":
+            self.rnn = nn.RNN(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        else:
+            raise Exception
+        if bi_direction:
+            self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
+        else:
+            self.linear = nn.Linear(self.hidden_dim, self.num_classes)
+        self.use_crf = use_crf
+        if self.use_crf:
+            self.crf = ContionalRandomField(num_classes)
+
+    def forward(self, x):
+        """
+        :param x: LongTensor, [batch_size, mex_len]
+        :return y: [batch_size, tag_size, tag_size]
+        """
+        x = self.Emb(x)
+        # [batch_size, max_len, word_emb_dim]
+        x, hidden = self.rnn(x)
+        # [batch_size, max_len, hidden_size * direction]
+        y = self.linear(x)
+        # [batch_size, max_len, num_classes]
+        return y
+
+    def loss(self, x, y, mask, batch_size, max_len):
+        """
+        Negative log likelihood loss.
+        :param x: FloatTensor, [batch_size, tag_size, tag_size]
+        :param y: LongTensor, [batch_size, max_len]
+        :param mask: ByteTensor, [batch_size, max_len]
+        :param batch_size: int
+        :param max_len: int
+        :return loss:
+                prediction:
+        """
+        x = x.float()
+        y = y.long()
+        mask = mask.byte()
+        # print(x.shape, y.shape, mask.shape)
+
+        if self.use_crf:
+            total_loss = self.crf(x, y, mask)
+            tag_seq = self.crf.viterbi_decode(x, mask)
+        else:
+            # error
+            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
+            x = x.view(batch_size * max_len, -1)
+            score = F.log_softmax(x)
+            total_loss = loss_function(score, y.view(batch_size * max_len))
+            _, tag_seq = torch.max(score)
+            tag_seq = tag_seq.view(batch_size, max_len)
+        return torch.mean(total_loss), tag_seq
diff --git a/fastNLP/modules/CRF.py b/fastNLP/modules/CRF.py
index 6361b93d..96c84dca 100644
--- a/fastNLP/modules/CRF.py
+++ b/fastNLP/modules/CRF.py
@@ -82,7 +82,7 @@ class ContionalRandomField(nn.Module):
     def _glod_score(self, feats, tags, masks):
         """
         Compute the score for the gold path.
-        :param feats: FloatTensor, batch_size x tag_size x tag_size
+        :param feats: FloatTensor, batch_size x max_len x tag_size
         :param tags: LongTensor, batch_size x max_len
         :param masks: ByteTensor, batch_size x max_len
         :return:FloatTensor, batch_size
@@ -118,7 +118,7 @@ class ContionalRandomField(nn.Module):
     def forward(self, feats, tags, masks):
         """
         Calculate the neg log likelihood
-        :param feats:FloatTensor, batch_size x tag_size x tag_size
+        :param feats:FloatTensor, batch_size x max_len x tag_size
         :param tags:LongTensor, batch_size x max_len
         :param masks:ByteTensor batch_size x max_len
         :return:FloatTensor, batch_size
diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py
index a19898c6..d23a0ec2 100644
--- a/fastNLP/modules/prototype/example.py
+++ b/fastNLP/modules/prototype/example.py
@@ -1,12 +1,13 @@
-import torch
-import torch.nn as nn
-import encoder
+import time
+
 import aggregation
+import dataloader
 import embedding
+import encoder
 import predict
+import torch
+import torch.nn as nn
 import torch.optim as optim
-import time
-import dataloader
 
 WORD_NUM = 357361
 WORD_SIZE = 100
@@ -16,6 +17,30 @@ R = 10
 MLP_HIDDEN = 2000 
 CLASSES_NUM = 5
 
+from fastNLP.models.base_model import BaseModel
+from fastNLP.action.trainer import BaseTrainer
+
+
+class MyNet(BaseModel):
+    def __init__(self):
+        super(MyNet, self).__init__()
+        self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE)
+        self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True)
+        self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R)
+        self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM)
+        self.penalty = None
+
+    def encode(self, x):
+        return self.encode(self.embedding(x))
+
+    def aggregate(self, x):
+        x, self.penalty = self.aggregate(x)
+        return x
+
+    def decode(self, x):
+        return [self.predict(x), self.penalty]
+
+
 class Net(nn.Module):
     """
     A model for sentiment analysis using lstm and self-attention
@@ -34,6 +59,19 @@ class Net(nn.Module):
         x = self.predict(x)
         return x, penalty
 
+
+class MyTrainer(BaseTrainer):
+    def __init__(self, args):
+        super(MyTrainer, self).__init__(args)
+        self.optimizer = None
+
+    def define_optimizer(self):
+        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
+    def define_loss(self):
+        self.loss_func = nn.CrossEntropyLoss()
+
+
 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
     momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
     """
diff --git a/fastNLP/modules/utils.py b/fastNLP/modules/utils.py
index 15afe883..a6b31a20 100644
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@@ -7,3 +7,9 @@ def mask_softmax(matrix, mask):
     else:
         raise NotImplementedError
     return result
+
+
+def seq_mask(seq_len, max_len):
+    mask = [torch.ge(torch.LongTensor(seq_len), i + 1) for i in range(max_len)]
+    mask = torch.stack(mask, 1)
+    return mask
diff --git a/fastNLP/reproduction/__init__.py b/fastNLP/reproduction/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/fastNLP/reproduction/CNN-sentence_classification/.gitignore b/reproduction/CNN-sentence_classification/.gitignore
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/.gitignore
rename to reproduction/CNN-sentence_classification/.gitignore
diff --git a/fastNLP/reproduction/CNN-sentence_classification/README.md b/reproduction/CNN-sentence_classification/README.md
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/README.md
rename to reproduction/CNN-sentence_classification/README.md
diff --git a/fastNLP/reproduction/CNN-sentence_classification/__init__.py b/reproduction/CNN-sentence_classification/__init__.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/__init__.py
rename to reproduction/CNN-sentence_classification/__init__.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/dataset.py b/reproduction/CNN-sentence_classification/dataset.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/dataset.py
rename to reproduction/CNN-sentence_classification/dataset.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/model.py b/reproduction/CNN-sentence_classification/model.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/model.py
rename to reproduction/CNN-sentence_classification/model.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
rename to reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
diff --git a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
rename to reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
diff --git a/fastNLP/reproduction/CNN-sentence_classification/train.py b/reproduction/CNN-sentence_classification/train.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/train.py
rename to reproduction/CNN-sentence_classification/train.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/LICENSE b/reproduction/Char-aware_NLM/LICENSE
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/LICENSE
rename to reproduction/Char-aware_NLM/LICENSE
diff --git a/fastNLP/reproduction/Char-aware_NLM/README.md b/reproduction/Char-aware_NLM/README.md
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/README.md
rename to reproduction/Char-aware_NLM/README.md
diff --git a/fastNLP/reproduction/Char-aware_NLM/__init__.py b/reproduction/Char-aware_NLM/__init__.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/__init__.py
rename to reproduction/Char-aware_NLM/__init__.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/model.py b/reproduction/Char-aware_NLM/model.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/model.py
rename to reproduction/Char-aware_NLM/model.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/test.py b/reproduction/Char-aware_NLM/test.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/test.py
rename to reproduction/Char-aware_NLM/test.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/test.txt b/reproduction/Char-aware_NLM/test.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/test.txt
rename to reproduction/Char-aware_NLM/test.txt
diff --git a/fastNLP/reproduction/Char-aware_NLM/train.py b/reproduction/Char-aware_NLM/train.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/train.py
rename to reproduction/Char-aware_NLM/train.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/train.txt b/reproduction/Char-aware_NLM/train.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/train.txt
rename to reproduction/Char-aware_NLM/train.txt
diff --git a/fastNLP/reproduction/Char-aware_NLM/utilities.py b/reproduction/Char-aware_NLM/utilities.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/utilities.py
rename to reproduction/Char-aware_NLM/utilities.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/valid.txt b/reproduction/Char-aware_NLM/valid.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/valid.txt
rename to reproduction/Char-aware_NLM/valid.txt
diff --git a/fastNLP/reproduction/HAN-document_classification/README.md b/reproduction/HAN-document_classification/README.md
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/README.md
rename to reproduction/HAN-document_classification/README.md
diff --git a/fastNLP/reproduction/HAN-document_classification/__init__.py b/reproduction/HAN-document_classification/__init__.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/__init__.py
rename to reproduction/HAN-document_classification/__init__.py
diff --git a/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl b/reproduction/HAN-document_classification/data/test_samples.pkl
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
rename to reproduction/HAN-document_classification/data/test_samples.pkl
diff --git a/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl b/reproduction/HAN-document_classification/data/train_samples.pkl
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
rename to reproduction/HAN-document_classification/data/train_samples.pkl
diff --git a/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec b/reproduction/HAN-document_classification/data/yelp.word2vec
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
rename to reproduction/HAN-document_classification/data/yelp.word2vec
diff --git a/fastNLP/reproduction/HAN-document_classification/evaluate.py b/reproduction/HAN-document_classification/evaluate.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/evaluate.py
rename to reproduction/HAN-document_classification/evaluate.py
diff --git a/fastNLP/reproduction/HAN-document_classification/model.py b/reproduction/HAN-document_classification/model.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/model.py
rename to reproduction/HAN-document_classification/model.py
diff --git a/fastNLP/reproduction/HAN-document_classification/preprocess.py b/reproduction/HAN-document_classification/preprocess.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/preprocess.py
rename to reproduction/HAN-document_classification/preprocess.py
diff --git a/fastNLP/reproduction/HAN-document_classification/train.py b/reproduction/HAN-document_classification/train.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/train.py
rename to reproduction/HAN-document_classification/train.py
diff --git a/requirements.txt b/requirements.txt
index 0fc94538..d961dd92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-numpy==1.14.2
+numpy>=1.14.2
 torch==0.4.0
-torchvision==0.1.8
+torchvision>=0.1.8
diff --git a/test/data_for_tests/people.txt b/test/data_for_tests/people.txt
new file mode 100644
index 00000000..f34c85cb
--- /dev/null
+++ b/test/data_for_tests/people.txt
@@ -0,0 +1,67 @@
+﻿迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
+
+中	B-nt
+共	M-nt
+中	M-nt
+央	E-nt
+总	B-n
+书	M-n
+记	E-n
+、	S-w
+国	B-n
+家	E-n
+主	B-n
+席	E-n
+江	B-nr
+泽	M-nr
+民	E-nr
+
+（	S-w
+一	B-t
+九	M-t
+九	M-t
+七	M-t
+年	E-t
+十	B-t
+二	M-t
+月	E-t
+三	B-t
+十	M-t
+一	M-t
+日	E-t
+）	S-w
+
+1	B-t
+2	M-t
+月	E-t
+3	B-t
+1	M-t
+日	E-t
+，	S-w
\ No newline at end of file
diff --git a/test/test_POS_pipeline.py b/test/test_POS_pipeline.py
new file mode 100644
index 00000000..af22e3b9
--- /dev/null
+++ b/test/test_POS_pipeline.py
@@ -0,0 +1,35 @@
+import sys
+
+sys.path.append("..")
+
+from fastNLP.action.trainer import POSTrainer
+from fastNLP.loader.dataset_loader import POSDatasetLoader
+from fastNLP.loader.preprocess import POSPreprocess
+from fastNLP.models.sequence_modeling import SeqLabeling
+
+data_name = "people.txt"
+data_path = "data_for_tests/people.txt"
+pickle_path = "data_for_tests"
+
+if __name__ == "__main__":
+    # Data Loader
+    pos = POSDatasetLoader(data_name, data_path)
+    train_data = pos.load_lines()
+
+    # Preprocessor
+    p = POSPreprocess(train_data, pickle_path)
+    vocab_size = p.vocab_size
+    num_classes = p.num_classes
+
+    # Trainer
+    train_args = {"epochs": 20, "batch_size": 1, "num_classes": num_classes,
+                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": True}
+    trainer = POSTrainer(train_args)
+
+    # Model
+    model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True)
+
+    # Start training
+    trainer.train(model)
+
+    print("Training finished!")