diff --git a/fastNLP/core/action.py b/fastNLP/core/action.py
index 17680216..d35a787c 100644
--- a/fastNLP/core/action.py
+++ b/fastNLP/core/action.py
@@ -1,16 +1,129 @@
+"""
+    This file defines Action(s) and sample methods.
+
+"""
 from collections import Counter
 
+import torch
 import numpy as np
+import _pickle
 
 
 class Action(object):
     """
-        base class for Trainer and Tester
+        Operations shared by Trainer, Tester, and Inference.
+        This is designed for reducing replicate codes.
+            - prepare_input: data preparation before a forward pass.
+            - make_batch: produce a min-batch of data. @staticmethod
+            - pad: padding method used in sequence modeling. @staticmethod
+            - mode: change network mode for either train or test. (for PyTorch) @staticmethod
+            - data_forward: a forward pass of the network.
+        The base Action shall define operations shared by as much task-specific Actions as possible.
     """
 
     def __init__(self):
         super(Action, self).__init__()
 
+    @staticmethod
+    def make_batch(iterator, data, output_length=True):
+        """
+        1. Perform batching from data and produce a batch of training data.
+        2. Add padding.
+        :param iterator: an iterator, (object that implements __next__ method) which returns the next sample.
+        :param data: list. Each entry is a sample, which is also a list of features and label(s).
+            E.g.
+                [
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
+                    ...
+                ]
+        :param output_length: whether to output the original length of the sequence before padding.
+        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
+                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
+
+                 return batch_x and batch_y, if output_length is False
+        """
+        indices = next(iterator)
+        batch = [data[idx] for idx in indices]
+        batch_x = [sample[0] for sample in batch]
+        batch_y = [sample[1] for sample in batch]
+        batch_x_pad = Action.pad(batch_x)
+        batch_y_pad = Action.pad(batch_y)
+        if output_length:
+            seq_len = [len(x) for x in batch_x]
+            return (batch_x_pad, seq_len), batch_y_pad
+        else:
+            return batch_x_pad, batch_y_pad
+
+    @staticmethod
+    def pad(batch, fill=0):
+        """
+        Pad a batch of samples to maximum length of this batch.
+        :param batch: list of list
+        :param fill: word index to pad, default 0.
+        :return: a padded batch
+        """
+        max_length = max([len(x) for x in batch])
+        for idx, sample in enumerate(batch):
+            if len(sample) < max_length:
+                batch[idx] = sample + ([fill] * (max_length - len(sample)))
+        return batch
+
+    @staticmethod
+    def mode(model, test=False):
+        """
+        Train mode or Test mode. This is for PyTorch currently.
+        :param model:
+        :param test:
+        """
+        if test:
+            model.eval()
+        else:
+            model.train()
+
+    def data_forward(self, network, x):
+        """
+        Forward pass of the data.
+        :param network: a model
+        :param x: input feature matrix and label vector
+        :return: output by the models
+
+        For PyTorch, just do "network(*x)"
+        """
+        raise NotImplementedError
+
+
+class SeqLabelAction(Action):
+    def __init__(self, action_args):
+        """
+        Define task-specific member variables.
+        :param action_args:
+        """
+        super(SeqLabelAction, self).__init__()
+        self.max_len = None
+        self.mask = None
+        self.best_accuracy = 0.0
+        self.use_cuda = action_args["use_cuda"]
+        self.seq_len = None
+        self.batch_size = None
+
+    def data_forward(self, network, inputs):
+        # unpack the returned value from make_batch
+        if isinstance(inputs, tuple):
+            x = inputs[0]
+            self.seq_len = inputs[1]
+        else:
+            x = inputs
+        x = torch.Tensor(x).long()
+        if torch.cuda.is_available() and self.use_cuda:
+            x = x.cuda()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        y = network(x)
+        return y
+
 
 def k_means_1d(x, k, max_iter=100):
     """
diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py
index e45f1017..8ee2ded6 100644
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -11,11 +11,12 @@ from fastNLP.core.action import RandomSampler, Batchifier
 class BaseTester(Action):
     """docstring for Tester"""
 
-    def __init__(self, test_args):
+    def __init__(self, test_args, action):
         """
         :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
         """
         super(BaseTester, self).__init__()
+        self.action = action
         self.validate_in_training = test_args["validate_in_training"]
         self.save_dev_data = None
         self.save_output = test_args["save_output"]
@@ -38,18 +39,21 @@ class BaseTester(Action):
             self.model = network
 
         # turn on the testing mode; clean up the history
-        self.mode(network, test=True)
+        self.action.mode(network, test=True)
+        self.eval_history.clear()
+        self.batch_output.clear()
 
         dev_data = self.prepare_input(self.pickle_path)
 
-        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
+        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
 
         num_iter = len(dev_data) // self.batch_size
 
         for step in range(num_iter):
-            batch_x, batch_y = self.make_batch(dev_data)
+            batch_x, batch_y = self.action.make_batch(iterator, dev_data)
+
+            prediction = self.action.data_forward(network, batch_x)
 
-            prediction = self.data_forward(network, batch_x)
             eval_results = self.evaluate(prediction, batch_y)
 
             if self.save_output:
@@ -64,53 +68,10 @@ class BaseTester(Action):
         :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
         """
         if self.save_dev_data is None:
-            data_dev = _pickle.load(open(data_path + "/data_dev.pkl", "rb"))
+            data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
             self.save_dev_data = data_dev
         return self.save_dev_data
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
-    @staticmethod
-    def pad(batch, fill=0):
-        """
-        Pad a batch of samples to maximum length.
-        :param batch: list of list
-        :param fill: word index to pad, default 0.
-        :return: a padded batch
-        """
-        max_length = max([len(x) for x in batch])
-        for idx, sample in enumerate(batch):
-            if len(sample) < max_length:
-                batch[idx] = sample + ([fill] * (max_length - len(sample)))
-        return batch
-
-    def data_forward(self, network, data):
-        raise NotImplementedError
-
     def evaluate(self, predict, truth):
         raise NotImplementedError
 
@@ -118,14 +79,6 @@ class BaseTester(Action):
     def metrics(self):
         raise NotImplementedError
 
-    def mode(self, model, test=True):
-        """TODO: combine this function with Trainer ?? """
-        if test:
-            model.eval()
-        else:
-            model.train()
-        self.eval_history.clear()
-
     def show_matrices(self):
         """
         This is called by Trainer to print evaluation on dev set.
@@ -139,43 +92,21 @@ class POSTester(BaseTester):
     Tester for sequence labeling.
     """
 
-    def __init__(self, test_args):
+    def __init__(self, test_args, action):
         """
         :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]"
         """
-        super(POSTester, self).__init__(test_args)
+        super(POSTester, self).__init__(test_args, action)
         self.max_len = None
         self.mask = None
         self.batch_result = None
 
-    def data_forward(self, network, inputs):
-        """TODO: combine with Trainer
-
-        :param network: the PyTorch model
-        :param x: list of list, [batch_size, max_len]
-        :return y: [batch_size, num_classes]
-        """
-        # unpack the returned value from make_batch
-        if isinstance(inputs, tuple):
-            x = inputs[0]
-            self.seq_len = inputs[1]
-        else:
-            x = inputs
-        x = torch.Tensor(x).long()
-        if torch.cuda.is_available() and self.use_cuda:
-            x = x.cuda()
-        self.batch_size = x.size(0)
-        self.max_len = x.size(1)
-
-        y = network(x)
-        return y
-
     def evaluate(self, predict, truth):
         truth = torch.Tensor(truth)
         if torch.cuda.is_available() and self.use_cuda:
             truth = truth.cuda()
-        loss = self.model.loss(predict, truth, self.seq_len) / self.batch_size
-        prediction = self.model.prediction(predict, self.seq_len)
+        loss = self.model.loss(predict, truth, self.action.seq_len) / self.batch_size
+        prediction = self.model.prediction(predict, self.action.seq_len)
         results = torch.Tensor(prediction).view(-1,)
         if torch.cuda.is_available() and self.use_cuda:
             results = results.cuda()
diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py
index 3da6b061..df848d7d 100644
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -18,17 +18,15 @@ class BaseTrainer(Action):
         Trainer receives a model and data, and then performs training.
 
         Subclasses must implement the following abstract methods:
-        - prepare_input
-        - mode
         - define_optimizer
-        - data_forward
         - grad_backward
         - get_loss
     """
 
-    def __init__(self, train_args):
+    def __init__(self, train_args, action):
         """
         :param train_args: dict of (key, value), or dict-like object. key is str.
+        :param action: an Action object that wrap most operations shared by Trainer, Tester, and Inference.
 
         The base trainer requires the following keys:
         - epochs: int, the number of epochs in training
@@ -37,6 +35,7 @@ class BaseTrainer(Action):
         - pickle_path: str, the path to pickle files for pre-processing
         """
         super(BaseTrainer, self).__init__()
+        self.action = action
         self.n_epochs = train_args["epochs"]
         self.batch_size = train_args["batch_size"]
         self.pickle_path = train_args["pickle_path"]
@@ -72,14 +71,14 @@ class BaseTrainer(Action):
         else:
             self.model = network
 
-        data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)
+        data_train = self.prepare_input(self.pickle_path)
 
         # define tester over dev data
         # TODO: more flexible
-        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
+        default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                       "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
                       "use_cuda": self.use_cuda}
-        validator = POSTester(valid_args)
+        validator = POSTester(default_valid_args, self.action)
 
         # main training epochs
         iterations = len(data_train) // self.batch_size
@@ -88,14 +87,14 @@ class BaseTrainer(Action):
         for epoch in range(1, self.n_epochs + 1):
 
             # turn on network training mode; define optimizer; prepare batch iterator
-            self.mode(test=False)
-            self.iterator = iter(Batchifier(BucketSampler(data_train), self.batch_size, drop_last=True))
+            self.action.mode(self.model, test=False)
+            iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))
 
             # training iterations in one epoch
             for step in range(iterations):
-                batch_x, batch_y = self.make_batch(data_train)
+                batch_x, batch_y = self.action.make_batch(iterator, data_train)
 
-                prediction = self.data_forward(network, batch_x)
+                prediction = self.action.data_forward(network, batch_x)
 
                 loss = self.get_loss(prediction, batch_y)
                 self.grad_backward(loss)
@@ -105,8 +104,6 @@ class BaseTrainer(Action):
                     print("[epoch {} step {}] train loss={:.2f}".format(epoch, step, loss.data))
 
             if self.validate:
-                if data_dev is None:
-                    raise RuntimeError("No validation data provided.")
                 validator.test(network)
 
                 if self.save_best_dev and self.best_eval_result(validator):
@@ -118,19 +115,13 @@ class BaseTrainer(Action):
 
         # finish training
 
-    def prepare_input(self, data_path):
-        data_train = _pickle.load(open(data_path + "data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
-        data_test = _pickle.load(open(data_path + "data_test.pkl", "rb"))
-        embedding = _pickle.load(open(data_path + "embedding.pkl", "rb"))
-        return data_train, data_dev, data_test, embedding
-
-    def mode(self, test=False):
+    def prepare_input(self, pickle_path):
         """
-        Tell the network to be trained or not.
-        :param test: bool
+        This is reserved for task-specific processing.
+        :param data_path:
+        :return:
         """
-        raise NotImplementedError
+        return _pickle.load(open(pickle_path + "/data_train.pkl", "rb"))
 
     def define_optimizer(self):
         """
@@ -146,17 +137,6 @@ class BaseTrainer(Action):
         """
         raise NotImplementedError
 
-    def data_forward(self, network, x):
-        """
-        Forward pass of the data.
-        :param network: a model
-        :param x: input feature matrix and label vector
-        :return: output by the models
-
-        For PyTorch, just do "network(*x)"
-        """
-        raise NotImplementedError
-
     def grad_backward(self, loss):
         """
         Compute gradient with link rules.
@@ -187,50 +167,6 @@ class BaseTrainer(Action):
         """
         raise NotImplementedError
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
-                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-
-                 return batch_x and batch_y, if output_length is False
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
-    @staticmethod
-    def pad(batch, fill=0):
-        """
-        Pad a batch of samples to maximum length.
-        :param batch: list of list
-        :param fill: word index to pad, default 0.
-        :return: a padded batch
-        """
-        max_length = max([len(x) for x in batch])
-        for idx, sample in enumerate(batch):
-            if len(sample) < max_length:
-                batch[idx] = sample + ([fill] * (max_length - len(sample)))
-        return batch
-
     def best_eval_result(self, validator):
         """
         :param validator: a Tester instance
@@ -287,48 +223,14 @@ class POSTrainer(BaseTrainer):
     Trainer for Sequence Modeling
 
     """
-    def __init__(self, train_args):
-        super(POSTrainer, self).__init__(train_args)
+    def __init__(self, train_args, action):
+        super(POSTrainer, self).__init__(train_args, action)
         self.vocab_size = train_args["vocab_size"]
         self.num_classes = train_args["num_classes"]
         self.max_len = None
         self.mask = None
         self.best_accuracy = 0.0
 
-    def prepare_input(self, data_path):
-
-        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        return data_train, data_dev, 0, 1
-
-    def data_forward(self, network, inputs):
-        """
-        :param network: the PyTorch model
-        :param inputs: list of list, [batch_size, max_len],
-                        or tuple of (batch_x, seq_len), batch_x == [batch_size, max_len]
-        :return y: [batch_size, max_len, tag_size]
-        """
-        # unpack the returned value from make_batch
-        if isinstance(inputs, tuple):
-            x = inputs[0]
-            self.seq_len = inputs[1]
-        else:
-            x = inputs
-        x = torch.Tensor(x).long()
-        if torch.cuda.is_available() and self.use_cuda:
-            x = x.cuda()
-        self.batch_size = x.size(0)
-        self.max_len = x.size(1)
-
-        y = network(x)
-        return y
-
-    def mode(self, test=False):
-        if test:
-            self.model.eval()
-        else:
-            self.model.train()
-
     def define_optimizer(self):
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
 
@@ -349,14 +251,13 @@ class POSTrainer(BaseTrainer):
         truth = torch.Tensor(truth)
         if torch.cuda.is_available() and self.use_cuda:
             truth = truth.cuda()
-        assert truth.shape == (self.batch_size, self.max_len)
+        assert truth.shape == (self.batch_size, self.action.max_len)
         if self.loss_func is None:
             if hasattr(self.model, "loss"):
                 self.loss_func = self.model.loss
             else:
                 self.define_loss()
-        loss = self.loss_func(predict, truth, self.seq_len)
-        # print("loss={:.2f}".format(loss.data))
+        loss = self.loss_func(predict, truth, self.action.seq_len)
         return loss
 
     def best_eval_result(self, validator):
@@ -367,36 +268,6 @@ class POSTrainer(BaseTrainer):
         else:
             return False
 
-    def make_batch(self, data, output_length=True):
-        """
-        1. Perform batching from data and produce a batch of training data.
-        2. Add padding.
-        :param data: list. Each entry is a sample, which is also a list of features and label(s).
-            E.g.
-                [
-                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
-                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
-                    ...
-                ]
-        :return (batch_x, seq_len): tuple of two elements, if output_length is true.
-                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
-                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
-                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
-
-                 return batch_x and batch_y, if output_length is False
-        """
-        indices = next(self.iterator)
-        batch = [data[idx] for idx in indices]
-        batch_x = [sample[0] for sample in batch]
-        batch_y = [sample[1] for sample in batch]
-        batch_x_pad = self.pad(batch_x)
-        batch_y_pad = self.pad(batch_y)
-        if output_length:
-            seq_len = [len(x) for x in batch_x]
-            return (batch_x_pad, seq_len), batch_y_pad
-        else:
-            return batch_x_pad, batch_y_pad
-
 
 class LanguageModelTrainer(BaseTrainer):
     """
diff --git a/test/test_seq_labeling.py b/test/seq_labeling.py
similarity index 91%
rename from test/test_seq_labeling.py
rename to test/seq_labeling.py
index 2bc2a899..10b9f986 100644
--- a/test/test_seq_labeling.py
+++ b/test/seq_labeling.py
@@ -2,6 +2,7 @@ import sys
 
 sys.path.append("..")
 
+from fastNLP.core.action import SeqLabelAction
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import POSTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader
@@ -57,7 +58,7 @@ def infer():
     print("Inference finished!")
 
 
-def train_test():
+def train_and_test():
     # Config Loader
     train_args = ConfigSection()
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
@@ -67,12 +68,14 @@ def train_test():
     train_data = pos_loader.load_lines()
 
     # Preprocessor
-    p = POSPreprocess(train_data, pickle_path)
+    p = POSPreprocess(train_data, pickle_path, train_dev_split=0.5)
     train_args["vocab_size"] = p.vocab_size
     train_args["num_classes"] = p.num_classes
 
+    action = SeqLabelAction(train_args)
+
     # Trainer
-    trainer = POSTrainer(train_args)
+    trainer = POSTrainer(train_args, action)
 
     # Model
     model = SeqLabeling(train_args)
@@ -100,7 +103,7 @@ def train_test():
     ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # Tester
-    tester = POSTester(test_args)
+    tester = POSTester(test_args, action)
 
     # Start testing
     tester.test(model)
@@ -111,5 +114,5 @@ def train_test():
 
 
 if __name__ == "__main__":
-    train_test()
-    # infer()
+    train_and_test()
+