diff --git a/fastNLP/core/action.py b/fastNLP/core/action.py
index 17680216..560fa42e 100644
--- a/fastNLP/core/action.py
+++ b/fastNLP/core/action.py
@@ -1,16 +1,111 @@
+"""
+    This file defines Action(s) and sample methods.
+
+"""
 from collections import Counter
 
 import numpy as np
+import torch
 
 
 class Action(object):
     """
-        base class for Trainer and Tester
+        Operations shared by Trainer, Tester, and Inference.
+        This is designed for reducing replicate codes.
+            - make_batch: produce a min-batch of data. @staticmethod
+            - pad: padding method used in sequence modeling. @staticmethod
+            - mode: change network mode for either train or test. (for PyTorch) @staticmethod
+        The base Action shall define operations shared by as much task-specific Actions as possible.
     """
 
     def __init__(self):
         super(Action, self).__init__()
 
+    @staticmethod
+    def make_batch(iterator, data, use_cuda, output_length=True, max_len=None):
+        """Batch and Pad data.
+        :param iterator: an iterator, (object that implements __next__ method) which returns the next sample.
+        :param data: list. Each entry is a sample, which is also a list of features and label(s).
+            E.g.
+                [
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
+                    ...
+                ]
+        :param use_cuda: bool
+        :param output_length: whether to output the original length of the sequence before padding.
+        :param max_len: int, maximum sequence length
+        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
+                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
+
+                 return batch_x and batch_y, if output_length is False
+        """
+        for indices in iterator:
+            batch = [data[idx] for idx in indices]
+            batch_x = [sample[0] for sample in batch]
+            batch_y = [sample[1] for sample in batch]
+
+            batch_x = Action.pad(batch_x)
+            # pad batch_y only if it is a 2-level list
+            if len(batch_y) > 0 and isinstance(batch_y[0], list):
+                batch_y = Action.pad(batch_y)
+
+            # convert list to tensor
+            batch_x = convert_to_torch_tensor(batch_x, use_cuda)
+            batch_y = convert_to_torch_tensor(batch_y, use_cuda)
+
+            # trim data to max_len
+            if max_len is not None and batch_x.size(1) > max_len:
+                batch_x = batch_x[:, :max_len]
+
+            if output_length:
+                seq_len = [len(x) for x in batch_x]
+                yield (batch_x, seq_len), batch_y
+            else:
+                yield batch_x, batch_y
+
+    @staticmethod
+    def pad(batch, fill=0):
+        """
+        Pad a batch of samples to maximum length of this batch.
+        :param batch: list of list
+        :param fill: word index to pad, default 0.
+        :return: a padded batch
+        """
+        max_length = max([len(x) for x in batch])
+        for idx, sample in enumerate(batch):
+            if len(sample) < max_length:
+                batch[idx] = sample + ([fill] * (max_length - len(sample)))
+        return batch
+
+    @staticmethod
+    def mode(model, test=False):
+        """
+        Train mode or Test mode. This is for PyTorch currently.
+        :param model:
+        :param test:
+        """
+        if test:
+            model.eval()
+        else:
+            model.train()
+
+
+def convert_to_torch_tensor(data_list, use_cuda):
+    """
+    convert lists into (cuda) Tensors
+    :param data_list: 2-level lists
+    :param use_cuda: bool
+    :param reqired_grad: bool
+    :return: PyTorch Tensor of shape [batch_size, max_seq_len]
+    """
+    data_list = torch.Tensor(data_list).long()
+    if torch.cuda.is_available() and use_cuda:
+        data_list = data_list.cuda()
+    return data_list
+
 
 def k_means_1d(x, k, max_iter=100):
     """
@@ -140,11 +235,10 @@ class Batchifier(object):
 
     def __iter__(self):
         batch = []
-        while True:
-            for idx in self.sampler:
-                batch.append(idx)
-                if len(batch) == self.batch_size:
-                    yield batch
-                    batch = []
-            if 0 < len(batch) < self.batch_size and self.drop_last is False:
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
                 yield batch
+                batch = []
+        if 0 < len(batch) < self.batch_size and self.drop_last is False:
+            yield batch
diff --git a/fastNLP/core/inference.py b/fastNLP/core/inference.py
index 7545a826..11a3ba48 100644
--- a/fastNLP/core/inference.py
+++ b/fastNLP/core/inference.py
@@ -1,7 +1,45 @@
+import numpy as np
 import torch
 
 from fastNLP.core.action import Batchifier, SequentialSampler
+from fastNLP.core.action import convert_to_torch_tensor
 from fastNLP.loader.preprocess import load_pickle, DEFAULT_UNKNOWN_LABEL
+from fastNLP.modules import utils
+
+
+def make_batch(iterator, data, use_cuda, output_length=False, max_len=None, min_len=None):
+    for indices in iterator:
+        batch_x = [data[idx] for idx in indices]
+        batch_x = pad(batch_x)
+        # convert list to tensor
+        batch_x = convert_to_torch_tensor(batch_x, use_cuda)
+
+        # trim data to max_len
+        if max_len is not None and batch_x.size(1) > max_len:
+            batch_x = batch_x[:, :max_len]
+        if min_len is not None and batch_x.size(1) < min_len:
+            pad_tensor = torch.zeros(batch_x.size(0), min_len - batch_x.size(1)).to(batch_x)
+            batch_x = torch.cat((batch_x, pad_tensor), 1)
+
+        if output_length:
+            seq_len = [len(x) for x in batch_x]
+            yield tuple([batch_x, seq_len])
+        else:
+            yield batch_x
+
+
+def pad(batch, fill=0):
+    """
+    Pad a batch of samples to maximum length.
+    :param batch: list of list
+    :param fill: word index to pad, default 0.
+    :return: a padded batch
+    """
+    max_length = max([len(x) for x in batch])
+    for idx, sample in enumerate(batch):
+        if len(sample) < max_length:
+            batch[idx] = sample + ([fill] * (max_length - len(sample)))
+    return batch
 
 
 class Inference(object):
@@ -9,7 +47,8 @@ class Inference(object):
     This is an interface focusing on predicting output based on trained models.
     It does not care about evaluations of the model, which is different from Tester.
     This is a high-level model wrapper to be called by FastNLP.
-
+    This class does not share any operations with Trainer and Tester.
+    Currently, Inference does not support GPU.
     """
 
     def __init__(self, pickle_path):
@@ -32,13 +71,11 @@ class Inference(object):
 
         # turn on the testing mode; clean up the history
         self.mode(network, test=True)
+        self.batch_output.clear()
 
-        self.iterator = iter(Batchifier(SequentialSampler(data), self.batch_size, drop_last=False))
-
-        num_iter = len(data) // self.batch_size
+        iterator = iter(Batchifier(SequentialSampler(data), self.batch_size, drop_last=False))
 
-        for step in range(num_iter):
-            batch_x = self.make_batch(data)
+        for batch_x in self.make_batch(iterator, data, use_cuda=False):
 
             prediction = self.data_forward(network, batch_x)
 
@@ -51,43 +88,12 @@ class Inference(object):
             network.eval()
         else:
             network.train()
-        self.batch_output.clear()
 
     def data_forward(self, network, x):
-        """
-        This is only for sequence labeling with CRF decoder. TODO: more general ?
-        :param network:
-        :param x:
-        :return:
-        """
-        seq_len = [len(seq) for seq in x]
-        x = torch.Tensor(x).long()
-        y = network(x)
-        prediction = network.prediction(y, seq_len)
-        # To do: hide framework
-        results = torch.Tensor(prediction).view(-1, )
-        return list(results.data)
+        raise NotImplementedError
 
-    def make_batch(self, data):
-        indices = next(self.iterator)
-        batch_x = [data[idx] for idx in indices]
-        if self.batch_size > 1:
-            batch_x = self.pad(batch_x)
-        return batch_x
-
-    @staticmethod
-    def pad(batch, fill=0):
-        """
-        Pad a batch of samples to maximum length.
-        :param batch: list of list
-        :param fill: word index to pad, default 0.
-        :return: a padded batch
-        """
-        max_length = max([len(x) for x in batch])
-        for idx, sample in enumerate(batch):
-            if len(sample) < max_length:
-                batch[idx] = sample + [fill * (max_length - len(sample))]
-        return batch
+    def make_batch(self, iterator, data, use_cuda):
+        raise NotImplementedError
 
     def prepare_input(self, data):
         """
@@ -106,13 +112,76 @@ class Inference(object):
             data_index.append([self.word2index.get(w, default_unknown_index) for w in example])
         return data_index
 
+    def prepare_output(self, data):
+        raise NotImplementedError
+
+
+class SeqLabelInfer(Inference):
+    """
+    Inference on sequence labeling models.
+    """
+
+    def __init__(self, pickle_path):
+        super(SeqLabelInfer, self).__init__(pickle_path)
+
+    def data_forward(self, network, inputs):
+        """
+        This is only for sequence labeling with CRF decoder.
+        :param network:
+        :param inputs:
+        :return: Tensor
+        """
+        if not isinstance(inputs[1], list) and isinstance(inputs[0], list):
+            raise RuntimeError("[fastnlp] output_length must be true for sequence modeling.")
+        # unpack the returned value from make_batch
+        x, seq_len = inputs[0], inputs[1]
+        batch_size, max_len = x.size(0), x.size(1)
+        mask = utils.seq_mask(seq_len, max_len)
+        mask = mask.byte().view(batch_size, max_len)
+        y = network(x)
+        prediction = network.prediction(y, mask)
+        return torch.Tensor(prediction, required_grad=False)
+
+    def make_batch(self, iterator, data, use_cuda):
+        return make_batch(iterator, data, use_cuda, output_length=True)
+
     def prepare_output(self, batch_outputs):
         """
         Transform list of batch outputs into strings.
-        :param batch_outputs: list of list, of shape [num_batch, tag_seq_length]. Element type is Tensor.
+        :param batch_outputs: list of 2-D Tensor, of shape [num_batch, batch-size, tag_seq_length].
         :return:
         """
         results = []
         for batch in batch_outputs:
-            results.append([self.index2label[int(x.data)] for x in batch])
+            for example in np.array(batch):
+                results.append([self.index2label[int(x)] for x in example])
+        return results
+
+
+class ClassificationInfer(Inference):
+    """
+    Inference on Classification models.
+    """
+
+    def __init__(self, pickle_path):
+        super(ClassificationInfer, self).__init__(pickle_path)
+
+    def data_forward(self, network, x):
+        """Forward through network."""
+        logits = network(x)
+        return logits
+
+    def make_batch(self, iterator, data, use_cuda):
+        return make_batch(iterator, data, use_cuda, output_length=False, min_len=5)
+
+    def prepare_output(self, batch_outputs):
+        """
+        Transform list of batch outputs into strings.
+        :param batch_outputs: list of 2-D Tensor, of shape [num_batch, batch-size, num_classes].
+        :return:
+        """
+        results = []
+        for batch_out in batch_outputs:
+            idx = np.argmax(batch_out.detach().numpy())
+            results.append(self.index2label[idx])
         return results
diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py
index e45f1017..425f2029 100644
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -1,14 +1,14 @@
 import _pickle
-import os
 
 import numpy as np
 import torch
 
 from fastNLP.core.action import Action
 from fastNLP.core.action import RandomSampler, Batchifier
+from fastNLP.modules import utils
 
 
-class BaseTester(Action):
+class BaseTester(object):
     """docstring for Tester"""
 
     def __init__(self, test_args):
@@ -37,25 +37,33 @@ class BaseTester(Action):
         else:
             self.model = network
 
+        # no backward setting for model
+        for param in network.parameters():
+            param.requires_grad = False
+
         # turn on the testing mode; clean up the history
         self.mode(network, test=True)
+        self.eval_history.clear()
+        self.batch_output.clear()
 
         dev_data = self.prepare_input(self.pickle_path)
 
-        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
-
-        num_iter = len(dev_data) // self.batch_size
+        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
+        n_batches = len(dev_data) // self.batch_size
+        n_print = 1
+        step = 0
 
-        for step in range(num_iter):
-            batch_x, batch_y = self.make_batch(dev_data)
+        for batch_x, batch_y in self.make_batch(iterator, dev_data):
 
             prediction = self.data_forward(network, batch_x)
+
             eval_results = self.evaluate(prediction, batch_y)
 
             if self.save_output:
                 self.batch_output.append(prediction)
             if self.save_loss:
                 self.eval_history.append(eval_results)
+            step += 1
 
     def prepare_input(self, data_path):
         """
@@ -64,51 +72,14 @@ class BaseTester(Action):
         :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
         """
         if self.save_dev_data is None:
-            data_dev = _pickle.load(open(data_path + "/data_dev.pkl", "rb"))
+            data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
             self.save_dev_data = data_dev
         return self.save_dev_data
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
-    @staticmethod
-    def pad(batch, fill=0):
-        """
-        Pad a batch of samples to maximum length.
-        :param batch: list of list
-        :param fill: word index to pad, default 0.
-        :return: a padded batch
-        """
-        max_length = max([len(x) for x in batch])
-        for idx, sample in enumerate(batch):
-            if len(sample) < max_length:
-                batch[idx] = sample + ([fill] * (max_length - len(sample)))
-        return batch
+    def mode(self, model, test):
+        Action.mode(model, test)
 
-    def data_forward(self, network, data):
+    def data_forward(self, network, x):
         raise NotImplementedError
 
     def evaluate(self, predict, truth):
@@ -118,14 +89,6 @@ class BaseTester(Action):
     def metrics(self):
         raise NotImplementedError
 
-    def mode(self, model, test=True):
-        """TODO: combine this function with Trainer ?? """
-        if test:
-            model.eval()
-        else:
-            model.train()
-        self.eval_history.clear()
-
     def show_matrices(self):
         """
         This is called by Trainer to print evaluation on dev set.
@@ -133,8 +96,11 @@ class BaseTester(Action):
         """
         raise NotImplementedError
 
+    def make_batch(self, iterator, data):
+        raise NotImplementedError
+
 
-class POSTester(BaseTester):
+class SeqLabelTester(BaseTester):
     """
     Tester for sequence labeling.
     """
@@ -143,44 +109,36 @@ class POSTester(BaseTester):
         """
         :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
         """
-        super(POSTester, self).__init__(test_args)
+        super(SeqLabelTester, self).__init__(test_args)
         self.max_len = None
         self.mask = None
         self.batch_result = None
 
     def data_forward(self, network, inputs):
-        """TODO: combine with Trainer
-
-        :param network: the PyTorch model
-        :param x: list of list, [batch_size, max_len]
-        :return y: [batch_size, num_classes]
-        """
+        if not isinstance(inputs, tuple):
+            raise RuntimeError("[fastnlp] output_length must be true for sequence modeling.")
         # unpack the returned value from make_batch
-        if isinstance(inputs, tuple):
-            x = inputs[0]
-            self.seq_len = inputs[1]
-        else:
-            x = inputs
-        x = torch.Tensor(x).long()
+        x, seq_len = inputs[0], inputs[1]
+        batch_size, max_len = x.size(0), x.size(1)
+        mask = utils.seq_mask(seq_len, max_len)
+        mask = mask.byte().view(batch_size, max_len)
         if torch.cuda.is_available() and self.use_cuda:
-            x = x.cuda()
-        self.batch_size = x.size(0)
-        self.max_len = x.size(1)
+            mask = mask.cuda()
+        self.mask = mask
 
         y = network(x)
         return y
 
     def evaluate(self, predict, truth):
-        truth = torch.Tensor(truth)
-        if torch.cuda.is_available() and self.use_cuda:
-            truth = truth.cuda()
-        loss = self.model.loss(predict, truth, self.seq_len) / self.batch_size
-        prediction = self.model.prediction(predict, self.seq_len)
+        batch_size, max_len = predict.size(0), predict.size(1)
+        loss = self.model.loss(predict, truth, self.mask) / batch_size
+
+        prediction = self.model.prediction(predict, self.mask)
         results = torch.Tensor(prediction).view(-1,)
-        if torch.cuda.is_available() and self.use_cuda:
-            results = results.cuda()
-        accuracy = float(torch.sum(results == truth.view((-1,)))) / results.shape[0]
-        return [loss.data, accuracy]
+        # make sure "results" is in the same device as "truth"
+        results = results.to(truth)
+        accuracy = torch.sum(results == truth.view((-1,))) / results.shape[0]
+        return [loss.data, accuracy.data]
 
     def metrics(self):
         batch_loss = np.mean([x[0] for x in self.eval_history])
@@ -195,8 +153,11 @@ class POSTester(BaseTester):
         loss, accuracy = self.metrics()
         return "dev loss={:.2f}, accuracy={:.2f}".format(loss, accuracy)
 
+    def make_batch(self, iterator, data):
+        return Action.make_batch(iterator, data, use_cuda=self.use_cuda, output_length=True)
+
 
-class ClassTester(BaseTester):
+class ClassificationTester(BaseTester):
     """Tester for classification."""
 
     def __init__(self, test_args):
@@ -204,7 +165,7 @@ class ClassTester(BaseTester):
         :param test_args: a dict-like object that has __getitem__ method, \
             can be accessed by "test_args["key_str"]"
         """
-        # super(ClassTester, self).__init__()
+        super(ClassificationTester, self).__init__(test_args)
         self.pickle_path = test_args["pickle_path"]
 
         self.save_dev_data = None
@@ -212,111 +173,8 @@ class ClassTester(BaseTester):
         self.mean_loss = None
         self.iterator = None
 
-        if "test_name" in test_args:
-            self.test_name = test_args["test_name"]
-        else:
-            self.test_name = "data_test.pkl"
-
-        if "validate_in_training" in test_args:
-            self.validate_in_training = test_args["validate_in_training"]
-        else:
-            self.validate_in_training = False
-
-        if "save_output" in test_args:
-            self.save_output = test_args["save_output"]
-        else:
-            self.save_output = False
-
-        if "save_loss" in test_args:
-            self.save_loss = test_args["save_loss"]
-        else:
-            self.save_loss = True
-
-        if "batch_size" in test_args:
-            self.batch_size = test_args["batch_size"]
-        else:
-            self.batch_size = 50
-        if "use_cuda" in test_args:
-            self.use_cuda = test_args["use_cuda"]
-        else:
-            self.use_cuda = True
-
-        if "max_len" in test_args:
-            self.max_len = test_args["max_len"]
-        else:
-            self.max_len = None
-
-        self.model = None
-        self.eval_history = []
-        self.batch_output = []
-
-    def test(self, network):
-        # prepare model
-        if torch.cuda.is_available() and self.use_cuda:
-            self.model = network.cuda()
-        else:
-            self.model = network
-
-        # no backward setting for model
-        for param in self.model.parameters():
-            param.requires_grad = False
-
-        # turn on the testing mode; clean up the history
-        self.mode(network, test=True)
-
-        # prepare test data
-        data_test = self.prepare_input(self.pickle_path, self.test_name)
-
-        # data generator
-        self.iterator = iter(Batchifier(
-            RandomSampler(data_test), self.batch_size, drop_last=False))
-
-        # test
-        n_batches = len(data_test) // self.batch_size
-        n_print = n_batches // 10
-        step = 0
-        for batch_x, batch_y in self.make_batch(data_test, max_len=self.max_len):
-            prediction = self.data_forward(network, batch_x)
-            eval_results = self.evaluate(prediction, batch_y)
-
-            if self.save_output:
-                self.batch_output.append(prediction)
-            if self.save_loss:
-                self.eval_history.append(eval_results)
-
-            if step % n_print == 0:
-                print("step: {:>5}".format(step))
-
-            step += 1
-
-    def prepare_input(self, data_dir, file_name):
-        """Prepare data."""
-        file_path = os.path.join(data_dir, file_name)
-        with open(file_path, 'rb') as f:
-            data = _pickle.load(f)
-        return data
-
-    def make_batch(self, data, max_len=None):
-        """Batch and pad data."""
-        for indices in self.iterator:
-            # generate batch and pad
-            batch = [data[idx] for idx in indices]
-            batch_x = [sample[0] for sample in batch]
-            batch_y = [sample[1] for sample in batch]
-            batch_x = self.pad(batch_x)
-
-            # convert to tensor
-            batch_x = torch.tensor(batch_x, dtype=torch.long)
-            batch_y = torch.tensor(batch_y, dtype=torch.long)
-            if torch.cuda.is_available() and self.use_cuda:
-                batch_x = batch_x.cuda()
-                batch_y = batch_y.cuda()
-
-            # trim data to max_len
-            if max_len is not None and batch_x.size(1) > max_len:
-                batch_x = batch_x[:, :max_len]
-
-            yield batch_x, batch_y
+    def make_batch(self, iterator, data, max_len=None):
+        return Action.make_batch(iterator, data, use_cuda=self.use_cuda, max_len=max_len)
 
     def data_forward(self, network, x):
         """Forward through network."""
@@ -337,10 +195,3 @@ class ClassTester(BaseTester):
         acc = float(torch.sum(y_pred == y_true)) / len(y_true)
         return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc
 
-    def mode(self, model, test=True):
-        """TODO: combine this function with Trainer ?? """
-        if test:
-            model.eval()
-        else:
-            model.train()
-        self.eval_history.clear()
diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py
index 3da6b061..d7515a40 100644
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -8,20 +8,18 @@ import torch
 import torch.nn as nn
 
 from fastNLP.core.action import Action
-from fastNLP.core.action import RandomSampler, Batchifier, BucketSampler
-from fastNLP.core.tester import POSTester
+from fastNLP.core.action import RandomSampler, Batchifier
+from fastNLP.core.tester import SeqLabelTester, ClassificationTester
+from fastNLP.modules import utils
 from fastNLP.saver.model_saver import ModelSaver
 
 
-class BaseTrainer(Action):
+class BaseTrainer(object):
     """Base trainer for all trainers.
         Trainer receives a model and data, and then performs training.
 
         Subclasses must implement the following abstract methods:
-        - prepare_input
-        - mode
         - define_optimizer
-        - data_forward
         - grad_backward
         - get_loss
     """
@@ -75,25 +73,29 @@ class BaseTrainer(Action):
         data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)
 
         # define tester over dev data
-        # TODO: more flexible
-        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
-                      "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
-                      "use_cuda": self.use_cuda}
-        validator = POSTester(valid_args)
+        if self.validate:
+            default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
+                                  "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
+                                  "use_cuda": self.use_cuda}
+            validator = self._create_validator(default_valid_args)
 
-        # main training epochs
-        iterations = len(data_train) // self.batch_size
         self.define_optimizer()
 
+        # main training epochs
+        start = time()
+        n_samples = len(data_train)
+        n_batches = n_samples // self.batch_size
+        n_print = 1
+
         for epoch in range(1, self.n_epochs + 1):
 
-            # turn on network training mode; define optimizer; prepare batch iterator
-            self.mode(test=False)
-            self.iterator = iter(Batchifier(BucketSampler(data_train), self.batch_size, drop_last=True))
+            # turn on network training mode; prepare batch iterator
+            self.mode(network, test=False)
+            iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=False))
 
             # training iterations in one epoch
-            for step in range(iterations):
-                batch_x, batch_y = self.make_batch(data_train)
+            step = 0
+            for batch_x, batch_y in self.make_batch(iterator, data_train):
 
                 prediction = self.data_forward(network, batch_x)
 
@@ -101,12 +103,14 @@ class BaseTrainer(Action):
                 self.grad_backward(loss)
                 self.update()
 
-                if step % 10 == 0:
-                    print("[epoch {} step {}] train loss={:.2f}".format(epoch, step, loss.data))
+                if step % n_print == 0:
+                    end = time()
+                    diff = timedelta(seconds=round(end - start))
+                    print("[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
+                        epoch, step, loss.data, diff))
+                step += 1
 
             if self.validate:
-                if data_dev is None:
-                    raise RuntimeError("No validation data provided.")
                 validator.test(network)
 
                 if self.save_best_dev and self.best_eval_result(validator):
@@ -116,22 +120,32 @@ class BaseTrainer(Action):
                 print("[epoch {}]".format(epoch), end=" ")
                 print(validator.show_matrices())
 
-        # finish training
-
-    def prepare_input(self, data_path):
-        data_train = _pickle.load(open(data_path + "data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
-        data_test = _pickle.load(open(data_path + "data_test.pkl", "rb"))
-        embedding = _pickle.load(open(data_path + "embedding.pkl", "rb"))
-        return data_train, data_dev, data_test, embedding
-
-    def mode(self, test=False):
+    def prepare_input(self, pickle_path):
         """
-        Tell the network to be trained or not.
-        :param test: bool
+        For task-specific processing.
+        :param pickle_path:
+        :return data_train, data_dev, data_test, embedding:
         """
+        names = [
+            "data_train.pkl", "data_dev.pkl",
+            "data_test.pkl", "embedding.pkl"]
+        files = []
+        for name in names:
+            file_path = os.path.join(pickle_path, name)
+            if os.path.exists(file_path):
+                with open(file_path, 'rb') as f:
+                    data = _pickle.load(f)
+            else:
+                data = []
+            files.append(data)
+        return tuple(files)
+
+    def make_batch(self, iterator, data):
         raise NotImplementedError
 
+    def mode(self, network, test):
+        Action.mode(network, test)
+
     def define_optimizer(self):
         """
         Define framework-specific optimizer specified by the models.
@@ -147,14 +161,6 @@ class BaseTrainer(Action):
         raise NotImplementedError
 
     def data_forward(self, network, x):
-        """
-        Forward pass of the data.
-        :param network: a model
-        :param x: input feature matrix and label vector
-        :return: output by the models
-
-        For PyTorch, just do "network(*x)"
-        """
         raise NotImplementedError
 
     def grad_backward(self, loss):
@@ -187,50 +193,6 @@ class BaseTrainer(Action):
         """
         raise NotImplementedError
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
-                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-
-                 return batch_x and batch_y, if output_length is False
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
-    @staticmethod
-    def pad(batch, fill=0):
-        """
-        Pad a batch of samples to maximum length.
-        :param batch: list of list
-        :param fill: word index to pad, default 0.
-        :return: a padded batch
-        """
-        max_length = max([len(x) for x in batch])
-        for idx, sample in enumerate(batch):
-            if len(sample) < max_length:
-                batch[idx] = sample + ([fill] * (max_length - len(sample)))
-        return batch
-
     def best_eval_result(self, validator):
         """
         :param validator: a Tester instance
@@ -245,6 +207,9 @@ class BaseTrainer(Action):
         """
         ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network)
 
+    def _create_validator(self, valid_args):
+        raise NotImplementedError
+
 
 class ToyTrainer(BaseTrainer):
     """
@@ -259,12 +224,6 @@ class ToyTrainer(BaseTrainer):
         data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
         return data_train, data_dev, 0, 1
 
-    def mode(self, test=False):
-        if test:
-            self.model.eval()
-        else:
-            self.model.train()
-
     def data_forward(self, network, x):
         return network(x)
 
@@ -282,53 +241,20 @@ class ToyTrainer(BaseTrainer):
         self.optimizer.step()
 
 
-class POSTrainer(BaseTrainer):
+class SeqLabelTrainer(BaseTrainer):
     """
     Trainer for Sequence Modeling
 
     """
+
     def __init__(self, train_args):
-        super(POSTrainer, self).__init__(train_args)
+        super(SeqLabelTrainer, self).__init__(train_args)
         self.vocab_size = train_args["vocab_size"]
         self.num_classes = train_args["num_classes"]
         self.max_len = None
         self.mask = None
         self.best_accuracy = 0.0
 
-    def prepare_input(self, data_path):
-
-        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        return data_train, data_dev, 0, 1
-
-    def data_forward(self, network, inputs):
-        """
-        :param network: the PyTorch model
-        :param inputs: list of list, [batch_size, max_len],
-                        or tuple of (batch_x, seq_len), batch_x == [batch_size, max_len]
-        :return y: [batch_size, max_len, tag_size]
-        """
-        # unpack the returned value from make_batch
-        if isinstance(inputs, tuple):
-            x = inputs[0]
-            self.seq_len = inputs[1]
-        else:
-            x = inputs
-        x = torch.Tensor(x).long()
-        if torch.cuda.is_available() and self.use_cuda:
-            x = x.cuda()
-        self.batch_size = x.size(0)
-        self.max_len = x.size(1)
-
-        y = network(x)
-        return y
-
-    def mode(self, test=False):
-        if test:
-            self.model.eval()
-        else:
-            self.model.train()
-
     def define_optimizer(self):
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
 
@@ -339,6 +265,23 @@ class POSTrainer(BaseTrainer):
     def update(self):
         self.optimizer.step()
 
+    def data_forward(self, network, inputs):
+        if not isinstance(inputs, tuple):
+            raise RuntimeError("[fastnlp] output_length must be true for sequence modeling.")
+        # unpack the returned value from make_batch
+        x, seq_len = inputs[0], inputs[1]
+
+        batch_size, max_len = x.size(0), x.size(1)
+        mask = utils.seq_mask(seq_len, max_len)
+        mask = mask.byte().view(batch_size, max_len)
+
+        if torch.cuda.is_available() and self.use_cuda:
+            mask = mask.cuda()
+        self.mask = mask
+
+        y = network(x)
+        return y
+
     def get_loss(self, predict, truth):
         """
         Compute loss given prediction and ground truth.
@@ -346,17 +289,10 @@ class POSTrainer(BaseTrainer):
         :param truth: ground truth label vector, [batch_size, max_len]
         :return: a scalar
         """
-        truth = torch.Tensor(truth)
-        if torch.cuda.is_available() and self.use_cuda:
-            truth = truth.cuda()
-        assert truth.shape == (self.batch_size, self.max_len)
-        if self.loss_func is None:
-            if hasattr(self.model, "loss"):
-                self.loss_func = self.model.loss
-            else:
-                self.define_loss()
-        loss = self.loss_func(predict, truth, self.seq_len)
-        # print("loss={:.2f}".format(loss.data))
+        batch_size, max_len = predict.size(0), predict.size(1)
+        assert truth.shape == (batch_size, max_len)
+
+        loss = self.model.loss(predict, truth, self.mask)
         return loss
 
     def best_eval_result(self, validator):
@@ -367,62 +303,18 @@ class POSTrainer(BaseTrainer):
         else:
             return False
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
-                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-
-                 return batch_x and batch_y, if output_length is False
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
+    def make_batch(self, iterator, data):
+        return Action.make_batch(iterator, data, output_length=True, use_cuda=self.use_cuda)
 
-class LanguageModelTrainer(BaseTrainer):
-    """
-    Trainer for Language Model
-    """
+    def _create_validator(self, valid_args):
+        return SeqLabelTester(valid_args)
 
-    def __init__(self, train_args):
-        super(LanguageModelTrainer, self).__init__(train_args)
 
-    def prepare_input(self, data_path):
-        pass
-
-
-class ClassTrainer(BaseTrainer):
+class ClassificationTrainer(BaseTrainer):
     """Trainer for classification."""
 
     def __init__(self, train_args):
-        # super(ClassTrainer, self).__init__(train_args)
-        self.n_epochs = train_args["epochs"]
-        self.batch_size = train_args["batch_size"]
-        self.pickle_path = train_args["pickle_path"]
-
-        if "validate" in train_args:
-            self.validate = train_args["validate"]
-        else:
-            self.validate = False
+        super(ClassificationTrainer, self).__init__(train_args)
         if "learn_rate" in train_args:
             self.learn_rate = train_args["learn_rate"]
         else:
@@ -431,127 +323,14 @@ class ClassTrainer(BaseTrainer):
             self.momentum = train_args["momentum"]
         else:
             self.momentum = 0.9
-        if "use_cuda" in train_args:
-            self.use_cuda = train_args["use_cuda"]
-        else:
-            self.use_cuda = True
 
-        self.model = None
         self.iterator = None
         self.loss_func = None
         self.optimizer = None
-
-    def train(self, network):
-        """General Training Steps
-        :param network: a model
-
-        The method is framework independent.
-        Work by calling the following methods:
-            - prepare_input
-            - mode
-            - define_optimizer
-            - data_forward
-            - get_loss
-            - grad_backward
-            - update
-        Subclasses must implement these methods with a specific framework.
-        """
-        # prepare model and data, transfer model to gpu if available
-        if torch.cuda.is_available() and self.use_cuda:
-            self.model = network.cuda()
-        else:
-            self.model = network
-        data_train, data_dev, data_test, embedding = self.prepare_input(
-            self.pickle_path)
-
-        # define tester over dev data
-        # valid_args = {
-        #     "save_output": True, "validate_in_training": True,
-        #     "save_dev_input": True, "save_loss": True,
-        #     "batch_size": self.batch_size, "pickle_path": self.pickle_path}
-        # validator = POSTester(valid_args)
-
-        # urn on network training mode, define loss and optimizer
-        self.define_loss()
-        self.define_optimizer()
-        self.mode(test=False)
-
-        # main training epochs
-        start = time()
-        n_samples = len(data_train)
-        n_batches = n_samples // self.batch_size
-        n_print = n_batches // 10
-        for epoch in range(self.n_epochs):
-            # prepare batch iterator
-            self.iterator = iter(Batchifier(
-                RandomSampler(data_train), self.batch_size, drop_last=False))
-
-            # training iterations in one epoch
-            step = 0
-            for batch_x, batch_y in self.make_batch(data_train):
-                prediction = self.data_forward(network, batch_x)
-
-                loss = self.get_loss(prediction, batch_y)
-                self.grad_backward(loss)
-                self.update()
-
-                if step % n_print == 0:
-                    acc = self.get_acc(prediction, batch_y)
-                    end = time()
-                    diff = timedelta(seconds=round(end - start))
-                    print("epoch: {:>3} step: {:>4} loss: {:>4.2}"
-                          " train acc: {:>5.1%} time: {}".format(
-                              epoch, step, loss, acc, diff))
-
-                step += 1
-
-            # if self.validate:
-            #     if data_dev is None:
-            #         raise RuntimeError("No validation data provided.")
-            #     validator.test(network)
-            #     print("[epoch {}]".format(epoch), end=" ")
-            #     print(validator.show_matrices())
-
-        # finish training
-
-    def prepare_input(self, data_path):
-
-        names = [
-            "data_train.pkl", "data_dev.pkl",
-            "data_test.pkl", "embedding.pkl"]
-
-        files = []
-        for name in names:
-            file_path = os.path.join(data_path, name)
-            if os.path.exists(file_path):
-                with open(file_path, 'rb') as f:
-                    data = _pickle.load(f)
-            else:
-                data = []
-            files.append(data)
-
-        return tuple(files)
-
-    def mode(self, test=False):
-        """
-        Tell the network to be trained or not.
-        :param test: bool
-        """
-        if test:
-            self.model.eval()
-        else:
-            self.model.train()
+        self.best_accuracy = 0
 
     def define_loss(self):
-        """
-            Assign an instance of loss function to self.loss_func
-            E.g. self.loss_func = nn.CrossEntropyLoss()
-        """
-        if self.loss_func is None:
-            if hasattr(self.model, "loss"):
-                self.loss_func = self.model.loss
-            else:
-                self.loss_func = nn.CrossEntropyLoss()
+        self.loss_func = nn.CrossEntropyLoss()
 
     def define_optimizer(self):
         """
@@ -567,10 +346,6 @@ class ClassTrainer(BaseTrainer):
         logits = network(x)
         return logits
 
-    def get_loss(self, predict, truth):
-        """Calculate loss."""
-        return self.loss_func(predict, truth)
-
     def grad_backward(self, loss):
         """Compute gradient backward."""
         self.model.zero_grad()
@@ -580,30 +355,21 @@ class ClassTrainer(BaseTrainer):
         """Apply gradient."""
         self.optimizer.step()
 
-    def make_batch(self, data):
-        """Batch and pad data."""
-        for indices in self.iterator:
-            batch = [data[idx] for idx in indices]
-            batch_x = [sample[0] for sample in batch]
-            batch_y = [sample[1] for sample in batch]
-            batch_x = self.pad(batch_x)
-
-            batch_x = torch.tensor(batch_x, dtype=torch.long)
-            batch_y = torch.tensor(batch_y, dtype=torch.long)
-            if torch.cuda.is_available() and self.use_cuda:
-                batch_x = batch_x.cuda()
-                batch_y = batch_y.cuda()
-
-            yield batch_x, batch_y
+    def make_batch(self, iterator, data):
+        return Action.make_batch(iterator, data, output_length=False, use_cuda=self.use_cuda)
 
     def get_acc(self, y_logit, y_true):
         """Compute accuracy."""
         y_pred = torch.argmax(y_logit, dim=-1)
         return int(torch.sum(y_true == y_pred)) / len(y_true)
 
+    def best_eval_result(self, validator):
+        _, _, accuracy = validator.metrics()
+        if accuracy > self.best_accuracy:
+            self.best_accuracy = accuracy
+            return True
+        else:
+            return False
 
-if __name__ == "__name__":
-    train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
-    trainer = BaseTrainer(train_args)
-    data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
-    trainer.make_batch(data=data_train)
+    def _create_validator(self, valid_args):
+        return ClassificationTester(valid_args)
diff --git a/fastNLP/models/cnn_text_classification.py b/fastNLP/models/cnn_text_classification.py
index 76a6cd97..66bb5ecc 100644
--- a/fastNLP/models/cnn_text_classification.py
+++ b/fastNLP/models/cnn_text_classification.py
@@ -1,13 +1,14 @@
 # python: 3.6
 # encoding: utf-8
 
+import torch
 import torch.nn as nn
+
 # import torch.nn.functional as F
-from fastNLP.models.base_model import BaseModel
 from fastNLP.modules.encoder.conv_maxpool import ConvMaxpool
 
 
-class CNNText(BaseModel):
+class CNNText(torch.nn.Module):
     """
     Text classification model by character CNN, the implementation of paper
     'Yoon Kim. 2014. Convolution Neural Networks for Sentence
diff --git a/fastNLP/models/sequence_modeling.py b/fastNLP/models/sequence_modeling.py
index 77a1f1d2..b28ef604 100644
--- a/fastNLP/models/sequence_modeling.py
+++ b/fastNLP/models/sequence_modeling.py
@@ -1,7 +1,7 @@
 import torch
 
 from fastNLP.models.base_model import BaseModel
-from fastNLP.modules import decoder, encoder, utils
+from fastNLP.modules import decoder, encoder
 
 
 class SeqLabeling(BaseModel):
@@ -34,46 +34,25 @@ class SeqLabeling(BaseModel):
         # [batch_size, max_len, num_classes]
         return x
 
-    def loss(self, x, y, seq_length):
+    def loss(self, x, y, mask):
         """
         Negative log likelihood loss.
-        :param x: FloatTensor, [batch_size, max_len, tag_size]
-        :param y: LongTensor, [batch_size, max_len]
-        :param seq_length: list of int. [batch_size]
+        :param x: Tensor, [batch_size, max_len, tag_size]
+        :param y: Tensor, [batch_size, max_len]
+        :param mask: ByteTensor, [batch_size, ,max_len]
         :return loss: a scalar Tensor
 
         """
         x = x.float()
         y = y.long()
-
-        batch_size = x.size(0)
-        max_len = x.size(1)
-
-        mask = utils.seq_mask(seq_length, max_len)
-        mask = mask.byte().view(batch_size, max_len)
-
-        # TODO: remove
-        if torch.cuda.is_available():
-            mask = mask.cuda()
-        # mask = x.new(batch_size, max_len)
-
         total_loss = self.Crf(x, y, mask)
-
         return torch.mean(total_loss)
 
-    def prediction(self, x, seq_length):
+    def prediction(self, x, mask):
         """
         :param x: FloatTensor, [batch_size, max_len, tag_size]
-        :param seq_length: int
-        :return prediction: list of tuple of (decode path(list), best score)
+        :param mask: ByteTensor, [batch_size, max_len]
+        :return prediction: list of [decode path(list)]
         """
-        x = x.float()
-        max_len = x.size(1)
-
-        mask = utils.seq_mask(seq_length, max_len)
-        # hack: make sure mask has the same device as x
-        mask = mask.to(x).byte()
-
         tag_seq = self.Crf.viterbi_decode(x, mask)
-
         return tag_seq
diff --git a/fastNLP/modules/decoder/CRF.py b/fastNLP/modules/decoder/CRF.py
index 5d8ce852..e6327ec0 100644
--- a/fastNLP/modules/decoder/CRF.py
+++ b/fastNLP/modules/decoder/CRF.py
@@ -132,6 +132,7 @@ class ConditionalRandomField(nn.Module):
         Given a feats matrix, return best decode path and best score.
         :param feats:
         :param masks:
+        :param get_score: bool, whether to output the decode score.
         :return:List[Tuple(List, float)],
         """
         batch_size, max_len, tag_size = feats.size()
diff --git a/reproduction/chinese_word_seg/cws_train.py b/reproduction/chinese_word_seg/cws_train.py
index ff549eb9..0a235be0 100644
--- a/reproduction/chinese_word_seg/cws_train.py
+++ b/reproduction/chinese_word_seg/cws_train.py
@@ -3,12 +3,12 @@ import sys
 sys.path.append("..")
 
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
-from fastNLP.core.trainer import POSTrainer
+from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
 from fastNLP.loader.preprocess import POSPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.core.tester import POSTester
+from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
 from fastNLP.core.inference import Inference
 
@@ -64,7 +64,7 @@ def train():
     train_args["num_classes"] = p.num_classes
 
     # Trainer
-    trainer = POSTrainer(train_args)
+    trainer = SeqLabelTrainer(train_args)
 
     # Model
     model = SeqLabeling(train_args)
@@ -96,7 +96,7 @@ def test():
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # Tester
-    tester = POSTester(test_args)
+    tester = SeqLabelTester(test_args)
 
     # Start testing
     tester.test(model)
diff --git a/test/data_for_tests/people.txt b/test/data_for_tests/people.txt
index f34c85cb..e4909679 100644
--- a/test/data_for_tests/people.txt
+++ b/test/data_for_tests/people.txt
@@ -64,4 +64,90 @@
 3	B-t
 1	M-t
 日	E-t
-，	S-w
\ No newline at end of file
+，	S-w
+迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
+
+迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
+
+迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
\ No newline at end of file
diff --git a/test/data_for_tests/text_classify.txt b/test/data_for_tests/text_classify.txt
new file mode 100644
index 00000000..24a51ce9
--- /dev/null
+++ b/test/data_for_tests/text_classify.txt
@@ -0,0 +1,100 @@
+entertainment	台 媒 预 测 周 冬 雨 金 马 奖 封 后 ， 大 气 的 倪 妮 却 佳 作 难 出
+food	农 村 就 是 好 ， 能 吃 到 纯 天 然 无 添 加 的 野 生 蜂 蜜 ， 营 养 又 健 康
+fashion	1 4 款 知 性 美 装 ， 时 尚 惊 艳 搁 浅 的 阳 光 轻 熟 的 优 雅
+history	火 焰 喷 射 器 1 0 0 0 度 火 焰 烧 死 鬼 子 4 连 拍
+society	1 8 岁 青 年 砍 死 8 8 岁 老 兵
+fashion	醋 洗 脸 的 正 确 方 法 洗 对 了 不 仅 美 容 肌 肤 还 能 收 缩 毛 孔
+game	大 家 都 说 说 除 了 这 1 0 个 英 雄 ， L O L 还 有 哪 些 英 雄 可 以 单 挑 男 爵
+sports	王 仕 鹏 退 役 担 任 N B A 总 决 赛 现 场 解 说 嘉 宾
+regimen	天 天 吃 “ 洋 快 餐 ” ， 5 岁 女 童 患 上 肝 炎
+food	汤 里 的 蛋 花 怎 样 才 能 如 花 朵 般 漂 亮 ， 注 意 这 一 点 即 可 ！
+tech	英 退 休 人 士 把 谷 歌 当 活 人 以 礼 貌 搜 索 请 求 征 服 整 个 互 联 网
+discovery	N A S A 探 测 器 拍 摄 地 球 、 火 星 和 冥 王 星 合 影
+society	当 骗 子 遇 上 撒 贝 宁 ！ 几 句 话 过 后 骗 子 赔 礼 道 歉 . . . . .
+history	红 军 长 征 在 中 国 革 命 史 上 的 地 位
+world	实 拍 神 秘 之 国 ， 带 你 走 进 真 实 的 朝 鲜
+tech	逼 格 爆 表 ！ 古 文 版 2 0 1 6 网 络 热 词 ： 燃 尽 洪 荒 之 力
+story	因 为 一 样 东 西 这 个 后 娘 竟 然 给 孩 子 磕 头
+game	L O L ： 皮 肤 对 操 作 没 影 响 ？ 细 数 那 些 有 加 成 效 果 的 皮 肤
+fashion	冬 天 想 穿 裙 子 又 怕 冷 ？ 学 了 这 些 搭 配 就 能 好 看 又 温 暖 ！
+entertainment	贾 建 军 少 林 三 光 剑 视 频
+food	再 也 不 用 出 去 吃 羊 肉 串 ， 自 己 做 又 卫 生 又 健 康
+regimen	男 人 多 吃 这 几 道 菜 ， 效 果 胜 “ 伟 哥 ”
+baby	宝 贝 厨 房 丨 肉 类 辅 食 第 一 步 宝 宝 的 生 长 发 育 每 天 都 离 不 开 它 ！
+travel	近 8 0 亿 的 顶 级 豪 华 邮 轮 上 到 底 有 什 么 ？
+sports	厄 齐 尔 心 中 最 想 签 约 的 三 个 人
+food	东 北 的 粘 豆 包 啊 ， 想 死 你 们 了 ！
+military	强 军 足 音
+sports	奥 运 赛 场 上 ， 被 喷 子 痛 批 的 十 大 知 名 运 动 员
+game	老 玩 家 分 享 对 2 0 1 6 L P L 夏 季 赛 R N G 的 分 析
+military	揭 秘 ： 关 于 战 争 的 五 大 真 相 ， 不 要 再 被 影 视 所 欺 骗 了 ！
+food	小 丫 厨 房 ： 夏 天 怎 么 吃 辣 不 长 痘 ？ 告 诉 你 火 锅 鸡 、 香 辣 鱼 的 正 确 做 法
+travel	中 国 首 个 内 陆 城 市 群 上 的 9 座 城 市 ， 看 看 有 你 的 家 乡 吗
+fashion	李 小 璐 做 榜 样 接 亲 吻 脚 大 流 行 新 娘 玉 足 怎 样 才 有 好 味 道 ？
+game	黄 金 吊 打 钻 石 ？ L O L 最 强 刷 钱 毒 瘤 打 法 诞 生
+history	奇 事 ！ 上 万 只 青 蛙 拦 路 告 状 ， 竟 然 牵 扯 出 一 桩 命 案
+baby	奶 奶 ， 你 为 什 么 不 让 我 用 尿 不 湿
+game	L O L 当 5 个 大 发 明 家 炮 台 围 住 泉 水 的 时 候 ： 这 是 真 虐 泉 ！
+essay	文 友 忠 告 暖 人 心 ： 人 到 中 年 “ 不 交 五 友 ”
+travel	这 一 年 ， 我 们 去 日 本
+food	好 吃 早 饭 近 似 吃 补 药
+fashion	夏 天 太 热 ， 唇 膏 化 了 如 何 办 ？
+society	厂 里 面 的 9 0 后 打 工 妹 ， 辛 苦 来 之 不 易
+history	罕 见 老 照 片 展 示 美 国 大 萧 条 时 期 景 象
+world	美 国 总 统 奥 巴 马 ， 是 童 心 未 泯 的 温 情 奥 大 大 ， 还 是 个 超 级 老 顽 童
+finance	脱 欧 公 投 前 一 天 抛 售 英 镑 这 一 次 索 罗 斯 也 被 “ 打 败 ” 了 . . .
+history	翻 越 长 征 路 上 第 一 座 大 山
+world	朝 鲜 批 奥 巴 马 涉 朝 言 论 ， 称 只 要 核 威 胁 存 在 将 继 续 强 化 核 武 力 量
+game	《 巫 师 3 ： 狂 猎 》 不 良 因 素 解 析 攻 略
+travel	在 郑 州 有 个 地 方 ， 时 光 仿 佛 在 那 儿 停 下 脚 步
+history	它 号 称 “ 天 下 第 一 团 ” ， 走 出 过 1 4 位 共 和 国 将 军 以 及 一 位 著 名 作 家
+car	煤 老 板 去 黄 江 买 车 ， 以 为 占 了 便 宜 没 想 被 坑 了 1 0 0 多 万
+society	“ 试 管 婴 儿 之 母 ” 张 丽 珠 遗 体 告 别 仪 式 8 日 举 行
+sports	东 京 奥 运 会 ， 中 国 女 排 卫 冕 的 几 率 有 多 大 ？
+travel	成 都 我 们 永 远 依 恋 的 城 市
+tech	雷 布 斯 除 了 小 米 还 有 这 些 秘 密 ， 你 知 道 吗 ？
+world	“ 仲 裁 庭 损 害 国 际 法 体 系 公 正 性 ” — — 访 武 汉 大 学 中 国 边 界 与 海 洋 研 究 院 首 席 专 家 易 显 河
+entertainment	上 海 观 众 和 欧 洲 三 大 影 展 之 间 的 距 离 ： 零 时 差
+essay	关 系 好 ， 一 切 便 好
+baby	刚 出 生 不 到 1 小 时 的 白 鲸 宝 宝 被 冲 上 岸 ， 被 救 后 对 恩 人 露 出 微 笑
+tech	赚 足 眼 球 ， 诺 基 亚 五 边 形 W i n 1 0 M o b i l e 概 念 手 机 ： 棱 镜
+essay	2 4 句 经 典 语 录 ： 穷 三 年 可 以 怨 命 ， 穷 十 年 就 得 自 省
+food	这 道 菜 真 下 饭 ！ 做 法 简 单 ， 防 辐 射 、 抗 衰 老 ， 关 键 还 便 宜
+entertainment	《 继 承 者 们 》 要 拍 中 国 版 ， 众 角 色 你 期 待 谁 来 演 ？
+game	D N F 暴 走 改 版 后 怎 么 样 D N F 暴 走 改 版 红 眼 变 弱 了 吗
+entertainment	郑 佩 佩 自 曝 与 李 小 龙 的 过 去 他 是 个 “ 疯 子 ”
+baby	女 性 只 有 8 4 次 最 佳 受 孕 机 会
+travel	月 初 一 个 人 去 了 日 本 . .
+military	不 为 人 知 的 8 0 万 苏 联 女 兵 ！ 最 后 一 张 很 美 ！
+tech	网 络 商 家 提 供 小 米 5 运 存 升 级 服 务 ： 3 G B 秒 变 6 G B
+history	宋 太 祖 、 宋 太 宗 凌 辱 亡 国 皇 后 ， 徽 钦 二 帝 后 宫 被 金 人 凌 辱
+history	人 有 三 面 最 “ 难 吃 ” ！ 黑 帮 大 佬 杜 月 笙 论 江 湖 规 矩 ！ 一 生 只 怕 这 一 人
+game	来 了 ！ 索 尼 P S 4 独 占 大 作 《 战 神 4 》 正 式 公 布
+discovery	延 时 视 频 显 示 珊 瑚 如 何 “ 驱 逐 ” 共 生 藻 类
+car	传 祺 G A 8 和 东 风 A 9 谁 才 是 自 主 “ 豪 车 ” 大 佬
+fashion	娶 老 婆 就 要 娶 这 种 ！ 蒋 欣 这 样 微 胖 的 女 人 好 看 又 实 用
+sports	黄 山 姑 娘 吕 秀 芝 勇 夺 奥 运 铜 牌 数 百 父 老 彻 夜 为 她 加 油
+military	[ 每 日 军 图 ] 土 豪 补 仓 ！ 沙 特 再 次 购 买 上 百 辆 美 国 M 1 A 2 主 战 坦 克
+military	美 军 这 款 武 器 号 称 能 让 半 个 中 国 陷 入 黑 暗 ， 解 放 军 少 将 ： 我 们 也 有
+world	邓 小 平 与 日 本 天 皇 的 历 史 性 会 谈 ， 对 中 日 两 国 都 具 有 深 远 的 意 义 啊 ！
+baby	为 什 么 有 人 上 个 厕 所 都 能 生 出 孩 子 ？
+fashion	欣 宜 举 行 首 次 个 唱 十 万 颗 宝 仕 奥 莎 仿 水 晶 闪 耀 全 场
+food	小 两 口 上 周 的 晚 餐
+society	在 北 京 就 要 守 规 矩
+entertainment	知 情 人 曝 翰 爽 分 手 内 幕 ： 郑 爽 想 结 婚 却 被 一 直 拖 着
+military	中 国 反 舰 导 弹 世 界 第 一 远 远 超 过 美 国 但 为 何 却 还 不 如 俄 罗 斯 ？
+entertainment	他 除 了 是 《 我 歌 》 音 乐 总 监 ， 还 曾 组 乐 队 玩 摇 滚 ， 是 黄 家 驹 旧 日 知 己
+baby	长 鹅 口 疮 的 孩 子 怎 么 照 顾 ？ 不 要 再 说 拿 他 没 办 法 了 ！
+discovery	微 重 力 不 需 使 用 肌 肉 ， 太 空 人 返 回 地 球 后 脊 椎 旁 肌 肉 萎 缩 约 1 9 %
+regimen	这 6 种 人 将 来 会 得 老 年 痴 呆 ！ 预 防 老 年 痴 呆 症 ， 这 些 办 法 被 全 世 界 公 认
+society	2 0 1 6 年 上 海 即 将 发 生 哪 些 大 事 件 。 。 。 。
+car	北 汽 自 主 品 牌 亏 损 3 3 . 4 1 亿 额 外 促 销 成 主 因
+car	在 那 山 的 那 边 海 的 那 边 ， 有 一 群 自 由 侠
+history	一 个 小 城 就 屠 杀 了 4 0 0 0 苏 军 战 俘 ， 希 特 勒 死 神 战 队 的 崛 起 与 覆 灭
+baby	给 孩 子 洗 澡 时 ， 这 些 部 位 再 脏 也 不 要 碰 ！
+essay	好 久 不 见 ， 你 还 好 么
+baby	被 娃 误 伤 的 9 种 痛 ， 数 一 数 你 中 了 几 枪 ？
+food	初 秋 的 小 炖 品 放 冰 糖 就 比 较 滋 润 ， 放 红 糖 就 补 血 又 不 燥 热
+game	佩 服 佩 服 ！ 羊 驼 D e f t 单 排 重 回 韩 服 最 强 王 者 第 一 名 ！
+game	三 个 时 代 的 标 志 炉 石 传 说 三 大 远 古 毒 瘤 卡 组
+discovery	2 0 世 纪 最 伟 大 科 学 发 现 — — 魔 术 般 的 超 导 材 料 ！
\ No newline at end of file
diff --git a/test/test_seq_labeling.py b/test/seq_labeling.py
similarity index 87%
rename from test/test_seq_labeling.py
rename to test/seq_labeling.py
index 2bc2a899..db171215 100644
--- a/test/test_seq_labeling.py
+++ b/test/seq_labeling.py
@@ -3,14 +3,14 @@ import sys
 sys.path.append("..")
 
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
-from fastNLP.core.trainer import POSTrainer
+from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader
 from fastNLP.loader.preprocess import POSPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.core.tester import POSTester
+from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
-from fastNLP.core.inference import Inference
+from fastNLP.core.inference import SeqLabelInfer
 
 data_name = "people.txt"
 data_path = "data_for_tests/people.txt"
@@ -50,14 +50,15 @@ def infer():
     """
 
     # Inference interface
-    infer = Inference(pickle_path)
+    infer = SeqLabelInfer(pickle_path)
     results = infer.predict(model, infer_data)
 
-    print(results)
+    for res in results:
+        print(res)
     print("Inference finished!")
 
 
-def train_test():
+def train_and_test():
     # Config Loader
     train_args = ConfigSection()
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
@@ -67,12 +68,12 @@ def train_test():
     train_data = pos_loader.load_lines()
 
     # Preprocessor
-    p = POSPreprocess(train_data, pickle_path)
+    p = POSPreprocess(train_data, pickle_path, train_dev_split=0.5)
     train_args["vocab_size"] = p.vocab_size
     train_args["num_classes"] = p.num_classes
 
     # Trainer
-    trainer = POSTrainer(train_args)
+    trainer = SeqLabelTrainer(train_args)
 
     # Model
     model = SeqLabeling(train_args)
@@ -100,7 +101,7 @@ def train_test():
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # Tester
-    tester = POSTester(test_args)
+    tester = SeqLabelTester(test_args)
 
     # Start testing
     tester.test(model)
@@ -111,5 +112,5 @@ def train_test():
 
 
 if __name__ == "__main__":
-    train_test()
-    # infer()
+    # train_and_test()
+    infer()
diff --git a/test/test_cws.py b/test/test_cws.py
index 8f6c1211..f293aefd 100644
--- a/test/test_cws.py
+++ b/test/test_cws.py
@@ -3,12 +3,12 @@ import sys
 sys.path.append("..")
 
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
-from fastNLP.core.trainer import POSTrainer
+from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
 from fastNLP.loader.preprocess import POSPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.core.tester import POSTester
+from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
 from fastNLP.core.inference import Inference
 
@@ -73,7 +73,7 @@ def train_test():
     train_args["num_classes"] = p.num_classes
 
     # Trainer
-    trainer = POSTrainer(train_args)
+    trainer = SeqLabelTrainer(train_args)
 
     # Model
     model = SeqLabeling(train_args)
@@ -101,7 +101,7 @@ def train_test():
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # Tester
-    tester = POSTester(test_args)
+    tester = SeqLabelTester(test_args)
 
     # Start testing
     tester.test(model)
@@ -113,4 +113,4 @@ def train_test():
 
 if __name__ == "__main__":
     train_test()
-    #infer()
+    infer()
diff --git a/test/test_tester.py b/test/test_tester.py
index dd1e6c23..9a3d949e 100644
--- a/test/test_tester.py
+++ b/test/test_tester.py
@@ -1,4 +1,4 @@
-from fastNLP.core.tester import POSTester
+from fastNLP.core.tester import SeqLabelTester
 from fastNLP.loader.config_loader import ConfigSection, ConfigLoader
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader
 from fastNLP.loader.preprocess import POSPreprocess
@@ -26,7 +26,7 @@ def foo():
     valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                   "save_loss": True, "batch_size": 8, "pickle_path": "./data_for_tests/",
                   "use_cuda": True}
-    validator = POSTester(valid_args)
+    validator = SeqLabelTester(valid_args)
 
     print("start validation.")
     validator.test(model)
diff --git a/test/text_classify.py b/test/text_classify.py
new file mode 100644
index 00000000..f18d4a38
--- /dev/null
+++ b/test/text_classify.py
@@ -0,0 +1,84 @@
+# Python: 3.5
+# encoding: utf-8
+
+import os
+
+from fastNLP.core.inference import ClassificationInfer
+from fastNLP.core.trainer import ClassificationTrainer
+from fastNLP.loader.dataset_loader import ClassDatasetLoader
+from fastNLP.loader.model_loader import ModelLoader
+from fastNLP.loader.preprocess import ClassPreprocess
+from fastNLP.models.cnn_text_classification import CNNText
+from fastNLP.saver.model_saver import ModelSaver
+
+data_dir = "./data_for_tests/"
+train_file = 'text_classify.txt'
+model_name = "model_class.pkl"
+
+
+def infer():
+    # load dataset
+    print("Loading data...")
+    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
+    data = ds_loader.load()
+    unlabeled_data = [x[0] for x in data]
+
+    # pre-process data
+    pre = ClassPreprocess(data_dir)
+    vocab_size, n_classes = pre.process(data, "data_train.pkl")
+    print("vocabulary size:", vocab_size)
+    print("number of classes:", n_classes)
+
+    # construct model
+    print("Building model...")
+    cnn = CNNText(class_num=n_classes, embed_num=vocab_size)
+    # Dump trained parameters into the model
+    ModelLoader.load_pytorch(cnn, "./data_for_tests/saved_model.pkl")
+    print("model loaded!")
+
+    infer = ClassificationInfer(data_dir)
+    results = infer.predict(cnn, unlabeled_data)
+    print(results)
+
+
+def train():
+    # load dataset
+    print("Loading data...")
+    ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file))
+    data = ds_loader.load()
+    print(data[0])
+
+    # pre-process data
+    pre = ClassPreprocess(data_dir)
+    vocab_size, n_classes = pre.process(data, "data_train.pkl")
+    print("vocabulary size:", vocab_size)
+    print("number of classes:", n_classes)
+
+    # construct model
+    print("Building model...")
+    cnn = CNNText(class_num=n_classes, embed_num=vocab_size)
+
+    # train
+    print("Training...")
+    train_args = {
+        "epochs": 1,
+        "batch_size": 10,
+        "pickle_path": data_dir,
+        "validate": False,
+        "save_best_dev": False,
+        "model_saved_path": "./data_for_tests/",
+        "use_cuda": True
+    }
+    trainer = ClassificationTrainer(train_args)
+    trainer.train(cnn)
+
+    print("Training finished!")
+
+    saver = ModelSaver("./data_for_tests/saved_model.pkl")
+    saver.save_pytorch(cnn)
+    print("Model saved!")
+
+
+if __name__ == "__main__":
+    # train()
+    infer()