From d7a8217132a7c67db01fda20ee629321839750b6 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Fri, 6 Jul 2018 23:14:58 +0800
Subject: [PATCH 1/7] finished POSTrainer

---
 fastNLP/action/trainer.py            | 39 ++++++++++-
 fastNLP/loader/dataset_loader.py     |  1 -
 fastNLP/loader/preprocess.py         | 66 +++++++++++--------
 fastNLP/models/base_model.py         | 14 ++--
 fastNLP/models/sequencce_modeling.py | 98 ++++++++++++++++++++++++++++
 fastNLP/modules/prototype/example.py | 48 ++++++++++++--
 fastNLP/modules/utils.py             |  6 ++
 test/test_POS_pipeline.py            | 29 ++++++++
 8 files changed, 259 insertions(+), 42 deletions(-)
 create mode 100644 fastNLP/models/sequencce_modeling.py
 create mode 100644 test/test_POS_pipeline.py

diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 437ab7d2..ac7138e5 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -7,6 +7,7 @@ import torch
 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
 from fastNLP.action.tester import Tester
+from fastNLP.modules.utils import seq_mask
 
 
 class BaseTrainer(Action):
@@ -28,6 +29,7 @@ class BaseTrainer(Action):
         training parameters
         """
         super(BaseTrainer, self).__init__()
+        self.train_args = train_args
         self.n_epochs = train_args.epochs
         self.validate = train_args.validate
         self.batch_size = train_args.batch_size
@@ -163,8 +165,8 @@ class BaseTrainer(Action):
         :param data: list. Each entry is a sample, which is also a list of features and label(s).
             E.g.
                 [
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 1
-                    [[feature_1, feature_2, feature_3], [label_1. label_2]],  # sample 2
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                     ...
                 ]
         :return batch_x: list. Each entry is a list of features of a sample.
@@ -313,6 +315,39 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer.step()
 
 
+class POSTrainer(BaseTrainer):
+    def __init__(self, train_args):
+        super(POSTrainer, self).__init__(train_args)
+        self.vocab_size = train_args.vocab_size
+        self.num_classes = train_args.num_classes
+        self.max_len = None
+        self.mask = None
+        self.batch_x = None
+
+    def prepare_input(self, data_path):
+        """
+            To do: Load pkl files of train/dev/test and embedding
+        """
+        data_train = _pickle.load(open(data_path + "data_train.pkl", "rb"))
+        data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
+        return data_train, data_dev
+
+    def data_forward(self, network, x):
+        seq_len = [len(seq) for seq in x]
+        x = torch.LongTensor(x)
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        x = network(x)
+        self.batch_x = x
+        return x
+
+    def get_loss(self, predict, truth):
+        truth = torch.LongTensor(truth)
+        loss, prediction = self.loss_func(self.batch_x, predict, self.mask, self.batch_size, self.max_len)
+        return loss
+
+
 if __name__ == "__name__":
     train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./")
     trainer = BaseTrainer(train_args)
diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py
index 7132eb3b..284be715 100644
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -15,7 +15,6 @@ class POSDatasetLoader(DatasetLoader):
 
     def __init__(self, data_name, data_path):
         super(POSDatasetLoader, self).__init__(data_name, data_path)
-        #self.data_set = self.load()
 
     def load(self):
         assert os.path.exists(self.data_path)
diff --git a/fastNLP/loader/preprocess.py b/fastNLP/loader/preprocess.py
index b8d88c35..8b9c6d88 100644
--- a/fastNLP/loader/preprocess.py
+++ b/fastNLP/loader/preprocess.py
@@ -46,19 +46,17 @@ class BasePreprocess(object):
 
 
 class POSPreprocess(BasePreprocess):
-
     """
         This class are used to preprocess the pos datasets.
-        In these datasets, each line is divided by '\t'
-        The first Col is the vocabulary.
-        The second Col is the labels.
+        In these datasets, each line are divided by '\t'
+    while the first Col is the vocabulary and the second
+    Col is the label.
         Different sentence are divided by an empty line.
         e.g:
         Tom label1
         and label2
         Jerry   label1
         .   label3
-
         Hello   label4
         world   label5
         !   label3
@@ -71,11 +69,13 @@ class POSPreprocess(BasePreprocess):
         super(POSPreprocess, self).__init__(data, pickle_path)
         self.word_dict = None
         self.label_dict = None
+        self.data = data
+        self.pickle_path = pickle_path
         self.build_dict()
         self.word2id()
-        self.id2word()
+        self.vocab_size = self.id2word()
         self.class2id()
-        self.id2class()
+        self.num_classes = self.id2class()
         self.embedding()
         self.data_train()
         self.data_dev()
@@ -87,7 +87,8 @@ class POSPreprocess(BasePreprocess):
                           DEFAULT_RESERVED_LABEL[2]: 4}
         self.label_dict = {}
         for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                 continue
             word = w.split('\t')
 
@@ -95,10 +96,11 @@ class POSPreprocess(BasePreprocess):
                 index = len(self.word_dict)
                 self.word_dict[word[0]] = index
 
-            for label in word[1: ]:
-                if label not in self.label_dict:
-                    index = len(self.label_dict)
-                    self.label_dict[label] = index
+            # for label in word[1: ]:
+            label = word[1]
+            if label not in self.label_dict:
+                index = len(self.label_dict)
+                self.label_dict[label] = index
 
     def pickle_exist(self, pickle_name):
         """
@@ -107,7 +109,7 @@ class POSPreprocess(BasePreprocess):
         """
         if not os.path.exists(self.pickle_path):
             os.makedirs(self.pickle_path)
-        file_name = self.pickle_path + pickle_name
+        file_name = os.path.join(self.pickle_path, pickle_name)
         if os.path.exists(file_name):
             return True
         else:
@@ -118,42 +120,48 @@ class POSPreprocess(BasePreprocess):
             return
         # nothing will be done if word2id.pkl exists
 
-        file_name = self.pickle_path + "word2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "word2id.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(self.word_dict, f)
 
     def id2word(self):
         if self.pickle_exist("id2word.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2word.pkl")
+            id2word_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2word_dict)
         # nothing will be done if id2word.pkl exists
 
         id2word_dict = {}
         for word in self.word_dict:
             id2word_dict[self.word_dict[word]] = word
-        file_name = self.pickle_path + "id2word.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2word.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(id2word_dict, f)
+        return len(id2word_dict)
 
     def class2id(self):
         if self.pickle_exist("class2id.pkl"):
             return
         # nothing will be done if class2id.pkl exists
 
-        file_name = self.pickle_path + "class2id.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "class2id.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(self.label_dict, f)
 
     def id2class(self):
         if self.pickle_exist("id2class.pkl"):
-            return
+            file_name = os.path.join(self.pickle_path, "id2class.pkl")
+            id2class_dict = _pickle.load(open(file_name, "rb"))
+            return len(id2class_dict)
         # nothing will be done if id2class.pkl exists
 
         id2class_dict = {}
         for label in self.label_dict:
             id2class_dict[self.label_dict[label]] = label
-        file_name = self.pickle_path + "id2class.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "id2class.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(id2class_dict, f)
+        return len(id2class_dict)
 
     def embedding(self):
         if self.pickle_exist("embedding.pkl"):
@@ -168,22 +176,26 @@ class POSPreprocess(BasePreprocess):
         data_train = []
         sentence = []
         for w in self.data:
-            if len(w) == 0:
+            w = w.strip()
+            if len(w) <= 1:
                 wid = []
                 lid = []
                 for i in range(len(sentence)):
+                    # if sentence[i][0]=="":
+                    #     print("")
                     wid.append(self.word_dict[sentence[i][0]])
                     lid.append(self.label_dict[sentence[i][1]])
                 data_train.append((wid, lid))
                 sentence = []
+                continue
             sentence.append(w.split('\t'))
 
-        file_name = self.pickle_path + "data_train.pkl"
-        with open(file_name, "wb", encoding='utf-8') as f:
+        file_name = os.path.join(self.pickle_path, "data_train.pkl")
+        with open(file_name, "wb") as f:
             _pickle.dump(data_train, f)
 
     def data_dev(self):
         pass
 
     def data_test(self):
-        pass
+        pass
\ No newline at end of file
diff --git a/fastNLP/models/base_model.py b/fastNLP/models/base_model.py
index 9249e2e3..54e28687 100644
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@@ -4,9 +4,9 @@ import torch
 class BaseModel(torch.nn.Module):
     """Base PyTorch model for all models.
         Three network modules presented:
-            - embedding module
+            - encoder module
             - aggregation module
-            - output module
+            - decoder module
         Subclasses must implement these three modules with "components".
     """
 
@@ -15,21 +15,20 @@ class BaseModel(torch.nn.Module):
 
     def forward(self, *inputs):
         x = self.encode(*inputs)
-        x = self.aggregation(x)
-        x = self.output(x)
+        x = self.aggregate(x)
+        x = self.decode(x)
         return x
 
     def encode(self, x):
         raise NotImplementedError
 
-    def aggregation(self, x):
+    def aggregate(self, x):
         raise NotImplementedError
 
-    def output(self, x):
+    def decode(self, x):
         raise NotImplementedError
 
 
-
 class Vocabulary(object):
     """A look-up table that allows you to access `Lexeme` objects. The `Vocab`
     instance also provides access to the `StringStore`, and owns underlying
@@ -93,3 +92,4 @@ class Token(object):
         self.doc = doc
         self.token = doc[offset]
         self.i = offset
+
diff --git a/fastNLP/models/sequencce_modeling.py b/fastNLP/models/sequencce_modeling.py
new file mode 100644
index 00000000..af6931e4
--- /dev/null
+++ b/fastNLP/models/sequencce_modeling.py
@@ -0,0 +1,98 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from fastNLP.models.base_model import BaseModel
+from fastNLP.modules.CRF import ContionalRandomField
+
+
+class SeqLabeling(BaseModel):
+    """
+    PyTorch Network for sequence labeling
+    """
+
+    def __init__(self, hidden_dim,
+                 rnn_num_layerd,
+                 num_classes,
+                 vocab_size,
+                 word_emb_dim=100,
+                 init_emb=None,
+                 rnn_mode="gru",
+                 bi_direction=False,
+                 dropout=0.5,
+                 use_crf=True):
+        super(SeqLabeling, self).__init__()
+
+        self.Emb = nn.Embedding(vocab_size, word_emb_dim)
+        if init_emb:
+            self.Emb.weight = nn.Parameter(init_emb)
+
+        self.num_classes = num_classes
+        self.input_dim = word_emb_dim
+        self.layers = rnn_num_layerd
+        self.hidden_dim = hidden_dim
+        self.bi_direction = bi_direction
+        self.dropout = dropout
+        self.mode = rnn_mode
+
+        if self.mode == "lstm":
+            self.rnn = nn.LSTM(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                               bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "gru":
+            self.rnn = nn.GRU(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        elif self.mode == "rnn":
+            self.rnn = nn.RNN(self.input_dim, self.hidden_dim, self.layers, batch_first=True,
+                              bidirectional=self.bi_direction, dropout=self.dropout)
+        else:
+            raise Exception
+        if bi_direction:
+            self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
+        else:
+            self.linear = nn.Linear(self.hidden_dim, self.num_classes)
+        self.use_crf = use_crf
+        if self.use_crf:
+            self.crf = ContionalRandomField(num_classes)
+
+    def forward(self, x):
+
+        x = self.embedding(x)
+        x, hidden = self.encode(x)
+        x = self.aggregation(x)
+        x = self.output(x)
+        return x
+
+    def embedding(self, x):
+        return self.Emb(x)
+
+    def encode(self, x):
+        return self.rnn(x)
+
+    def aggregate(self, x):
+        return x
+
+    def decode(self, x):
+        x = self.linear(x)
+        return x
+
+    def loss(self, x, y, mask, batch_size, max_len):
+        """
+        Negative log likelihood loss.
+        :param x:
+        :param y:
+        :param seq_len:
+        :return loss:
+                prediction:
+        """
+        if self.use_crf:
+            total_loss = self.crf(x, y, mask)
+            tag_seq = self.crf.viterbi_decode(x, mask)
+        else:
+            # error
+            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
+            x = x.view(batch_size * max_len, -1)
+            score = F.log_softmax(x)
+            total_loss = loss_function(score, y.view(batch_size * max_len))
+            _, tag_seq = torch.max(score)
+            tag_seq = tag_seq.view(batch_size, max_len)
+        return torch.mean(total_loss), tag_seq
diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py
index a19898c6..d23a0ec2 100644
--- a/fastNLP/modules/prototype/example.py
+++ b/fastNLP/modules/prototype/example.py
@@ -1,12 +1,13 @@
-import torch
-import torch.nn as nn
-import encoder
+import time
+
 import aggregation
+import dataloader
 import embedding
+import encoder
 import predict
+import torch
+import torch.nn as nn
 import torch.optim as optim
-import time
-import dataloader
 
 WORD_NUM = 357361
 WORD_SIZE = 100
@@ -16,6 +17,30 @@ R = 10
 MLP_HIDDEN = 2000 
 CLASSES_NUM = 5
 
+from fastNLP.models.base_model import BaseModel
+from fastNLP.action.trainer import BaseTrainer
+
+
+class MyNet(BaseModel):
+    def __init__(self):
+        super(MyNet, self).__init__()
+        self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE)
+        self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True)
+        self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R)
+        self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM)
+        self.penalty = None
+
+    def encode(self, x):
+        return self.encode(self.embedding(x))
+
+    def aggregate(self, x):
+        x, self.penalty = self.aggregate(x)
+        return x
+
+    def decode(self, x):
+        return [self.predict(x), self.penalty]
+
+
 class Net(nn.Module):
     """
     A model for sentiment analysis using lstm and self-attention
@@ -34,6 +59,19 @@ class Net(nn.Module):
         x = self.predict(x)
         return x, penalty
 
+
+class MyTrainer(BaseTrainer):
+    def __init__(self, args):
+        super(MyTrainer, self).__init__(args)
+        self.optimizer = None
+
+    def define_optimizer(self):
+        self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
+    def define_loss(self):
+        self.loss_func = nn.CrossEntropyLoss()
+
+
 def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
     momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
     """
diff --git a/fastNLP/modules/utils.py b/fastNLP/modules/utils.py
index 15afe883..a6b31a20 100644
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@@ -7,3 +7,9 @@ def mask_softmax(matrix, mask):
     else:
         raise NotImplementedError
     return result
+
+
+def seq_mask(seq_len, max_len):
+    mask = [torch.ge(torch.LongTensor(seq_len), i + 1) for i in range(max_len)]
+    mask = torch.stack(mask, 1)
+    return mask
diff --git a/test/test_POS_pipeline.py b/test/test_POS_pipeline.py
new file mode 100644
index 00000000..db4232e7
--- /dev/null
+++ b/test/test_POS_pipeline.py
@@ -0,0 +1,29 @@
+from fastNLP.action.trainer import POSTrainer
+from fastNLP.loader.dataset_loader import POSDatasetLoader
+from fastNLP.loader.preprocess import POSPreprocess
+from fastNLP.models.sequencce_modeling import SeqLabeling
+
+data_name = "people"
+data_path = "data/people.txt"
+pickle_path = "data"
+
+if __name__ == "__main__":
+    # Data Loader
+    pos = POSDatasetLoader(data_name, data_path)
+    train_data = pos.load_lines()
+
+    # Preprocessor
+    p = POSPreprocess(train_data, pickle_path)
+    vocab_size = p.vocab_size
+    num_classes = p.num_classes
+
+    # Trainer
+    train_args = POSTrainer.TrainConfig(epochs=20, batch_size=1, num_classes=num_classes,
+                                        vocab_size=vocab_size, pickle_path=pickle_path)
+    trainer = POSTrainer(train_args)
+
+    # Model
+    model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True)
+
+    # Start training.
+    trainer.train(model)

From cca276b8c09add219bbbcaa8cbf78d786358cea3 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Sat, 7 Jul 2018 16:57:57 +0800
Subject: [PATCH 2/7] - optimize package calling from test files - add
 people.txt in data_for_tests - To do: incorrect CRF param in POS_pipeline

---
 fastNLP/action/trainer.py            | 35 ++++++++++++---
 fastNLP/loader/dataset_loader.py     |  2 +-
 fastNLP/models/sequencce_modeling.py |  9 +++-
 test/data_for_tests/people.txt       | 67 ++++++++++++++++++++++++++++
 test/test_POS_pipeline.py            | 11 +++--
 5 files changed, 111 insertions(+), 13 deletions(-)
 create mode 100644 test/data_for_tests/people.txt

diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index ac7138e5..94a704f9 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -31,12 +31,13 @@ class BaseTrainer(Action):
         super(BaseTrainer, self).__init__()
         self.train_args = train_args
         self.n_epochs = train_args.epochs
-        self.validate = train_args.validate
+        # self.validate = train_args.validate
         self.batch_size = train_args.batch_size
         self.pickle_path = train_args.pickle_path
         self.model = None
         self.iterator = None
         self.loss_func = None
+        self.optimizer = None
 
     def train(self, network):
         """General training loop.
@@ -316,6 +317,8 @@ class WordSegTrainer(BaseTrainer):
 
 
 class POSTrainer(BaseTrainer):
+    TrainConfig = namedtuple("config", ["epochs", "batch_size", "pickle_path", "num_classes", "vocab_size"])
+
     def __init__(self, train_args):
         super(POSTrainer, self).__init__(train_args)
         self.vocab_size = train_args.vocab_size
@@ -328,9 +331,9 @@ class POSTrainer(BaseTrainer):
         """
             To do: Load pkl files of train/dev/test and embedding
         """
-        data_train = _pickle.load(open(data_path + "data_train.pkl", "rb"))
-        data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb"))
-        return data_train, data_dev
+        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        return data_train, data_dev, 0, 1
 
     def data_forward(self, network, x):
         seq_len = [len(seq) for seq in x]
@@ -342,10 +345,28 @@ class POSTrainer(BaseTrainer):
         self.batch_x = x
         return x
 
+    def mode(self, test=False):
+        if test:
+            self.model.eval()
+        else:
+            self.model.train()
+
+    def define_optimizer(self):
+        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
+
     def get_loss(self, predict, truth):
-        truth = torch.LongTensor(truth)
-        loss, prediction = self.loss_func(self.batch_x, predict, self.mask, self.batch_size, self.max_len)
-        return loss
+        """
+        Compute loss given prediction and ground truth.
+        :param predict: prediction label vector
+        :param truth: ground truth label vector
+        :return: a scalar
+        """
+        if self.loss_func is None:
+            if hasattr(self.model, "loss"):
+                self.loss_func = self.model.loss
+            else:
+                self.define_loss()
+        return self.loss_func(self.batch_x, predict, self.mask, self.batch_size, self.max_len)
 
 
 if __name__ == "__name__":
diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py
index 284be715..d57a48db 100644
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -23,7 +23,7 @@ class POSDatasetLoader(DatasetLoader):
         return line
 
     def load_lines(self):
-        assert os.path.exists(self.data_path)
+        assert (os.path.exists(self.data_path))
         with open(self.data_path, "r", encoding="utf-8") as f:
             lines = f.readlines()
         return lines
diff --git a/fastNLP/models/sequencce_modeling.py b/fastNLP/models/sequencce_modeling.py
index af6931e4..ba96d4b6 100644
--- a/fastNLP/models/sequencce_modeling.py
+++ b/fastNLP/models/sequencce_modeling.py
@@ -58,8 +58,8 @@ class SeqLabeling(BaseModel):
 
         x = self.embedding(x)
         x, hidden = self.encode(x)
-        x = self.aggregation(x)
-        x = self.output(x)
+        x = self.aggregate(x)
+        x = self.decode(x)
         return x
 
     def embedding(self, x):
@@ -84,6 +84,11 @@ class SeqLabeling(BaseModel):
         :return loss:
                 prediction:
         """
+        x = x.float()
+        y = y.long()
+        mask = mask.byte()
+        print(x.shape, y.shape, mask.shape)
+
         if self.use_crf:
             total_loss = self.crf(x, y, mask)
             tag_seq = self.crf.viterbi_decode(x, mask)
diff --git a/test/data_for_tests/people.txt b/test/data_for_tests/people.txt
new file mode 100644
index 00000000..f34c85cb
--- /dev/null
+++ b/test/data_for_tests/people.txt
@@ -0,0 +1,67 @@
+﻿迈	B-v
+向	E-v
+充	B-v
+满	E-v
+希	B-n
+望	E-n
+的	S-u
+新	S-a
+世	B-n
+纪	E-n
+—	B-w
+—	E-w
+一	B-t
+九	M-t
+九	M-t
+八	M-t
+年	E-t
+新	B-t
+年	E-t
+讲	B-n
+话	E-n
+（	S-w
+附	S-v
+图	B-n
+片	E-n
+1	S-m
+张	S-q
+）	S-w
+
+中	B-nt
+共	M-nt
+中	M-nt
+央	E-nt
+总	B-n
+书	M-n
+记	E-n
+、	S-w
+国	B-n
+家	E-n
+主	B-n
+席	E-n
+江	B-nr
+泽	M-nr
+民	E-nr
+
+（	S-w
+一	B-t
+九	M-t
+九	M-t
+七	M-t
+年	E-t
+十	B-t
+二	M-t
+月	E-t
+三	B-t
+十	M-t
+一	M-t
+日	E-t
+）	S-w
+
+1	B-t
+2	M-t
+月	E-t
+3	B-t
+1	M-t
+日	E-t
+，	S-w
\ No newline at end of file
diff --git a/test/test_POS_pipeline.py b/test/test_POS_pipeline.py
index db4232e7..66e418c6 100644
--- a/test/test_POS_pipeline.py
+++ b/test/test_POS_pipeline.py
@@ -1,11 +1,15 @@
+import sys
+
+sys.path.append("..")
+
 from fastNLP.action.trainer import POSTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader
 from fastNLP.loader.preprocess import POSPreprocess
 from fastNLP.models.sequencce_modeling import SeqLabeling
 
-data_name = "people"
-data_path = "data/people.txt"
-pickle_path = "data"
+data_name = "people.txt"
+data_path = "data_for_tests/people.txt"
+pickle_path = "data_for_tests"
 
 if __name__ == "__main__":
     # Data Loader
@@ -27,3 +31,4 @@ if __name__ == "__main__":
 
     # Start training.
     trainer.train(model)
+

From 4c9c791304d29f4289c87d6fe6b67ff40e5bbdc0 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Sat, 7 Jul 2018 16:59:59 +0800
Subject: [PATCH 3/7] cancel restriction for base model

---
 fastNLP/models/base_model.py | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/fastNLP/models/base_model.py b/fastNLP/models/base_model.py
index 54e28687..24dfdb85 100644
--- a/fastNLP/models/base_model.py
+++ b/fastNLP/models/base_model.py
@@ -3,31 +3,12 @@ import torch
 
 class BaseModel(torch.nn.Module):
     """Base PyTorch model for all models.
-        Three network modules presented:
-            - encoder module
-            - aggregation module
-            - decoder module
-        Subclasses must implement these three modules with "components".
+        To do: add some useful common features
     """
 
     def __init__(self):
         super(BaseModel, self).__init__()
 
-    def forward(self, *inputs):
-        x = self.encode(*inputs)
-        x = self.aggregate(x)
-        x = self.decode(x)
-        return x
-
-    def encode(self, x):
-        raise NotImplementedError
-
-    def aggregate(self, x):
-        raise NotImplementedError
-
-    def decode(self, x):
-        raise NotImplementedError
-
 
 class Vocabulary(object):
     """A look-up table that allows you to access `Lexeme` objects. The `Vocab`

From 83c032df5d661e0860695d80c37296480555b833 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Tue, 10 Jul 2018 18:51:42 +0800
Subject: [PATCH 4/7] fix bug in CRF comments; optimize PyTorch type
 conversion.

---
 fastNLP/action/trainer.py            | 23 ++++++++-------
 fastNLP/models/sequencce_modeling.py | 42 ++++++++++++----------------
 fastNLP/modules/CRF.py               |  4 +--
 3 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 94a704f9..1f22ef28 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -170,8 +170,8 @@ class BaseTrainer(Action):
                     [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                     ...
                 ]
-        :return batch_x: list. Each entry is a list of features of a sample.
-                 batch_y: list. Each entry is a list of labels of a sample.
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
         """
         if self.iterator is None:
             self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True))
@@ -325,7 +325,6 @@ class POSTrainer(BaseTrainer):
         self.num_classes = train_args.num_classes
         self.max_len = None
         self.mask = None
-        self.batch_x = None
 
     def prepare_input(self, data_path):
         """
@@ -336,14 +335,18 @@ class POSTrainer(BaseTrainer):
         return data_train, data_dev, 0, 1
 
     def data_forward(self, network, x):
+        """
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
         seq_len = [len(seq) for seq in x]
-        x = torch.LongTensor(x)
+        x = torch.Tensor(x).long()
         self.batch_size = x.size(0)
         self.max_len = x.size(1)
         self.mask = seq_mask(seq_len, self.max_len)
-        x = network(x)
-        self.batch_x = x
-        return x
+        y = network(x)
+        return y
 
     def mode(self, test=False):
         if test:
@@ -357,8 +360,8 @@ class POSTrainer(BaseTrainer):
     def get_loss(self, predict, truth):
         """
         Compute loss given prediction and ground truth.
-        :param predict: prediction label vector
-        :param truth: ground truth label vector
+        :param predict: prediction label vector, [batch_size, num_classes]
+        :param truth: ground truth label vector, [batch_size, max_len]
         :return: a scalar
         """
         if self.loss_func is None:
@@ -366,7 +369,7 @@ class POSTrainer(BaseTrainer):
                 self.loss_func = self.model.loss
             else:
                 self.define_loss()
-        return self.loss_func(self.batch_x, predict, self.mask, self.batch_size, self.max_len)
+        return self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
 
 
 if __name__ == "__name__":
diff --git a/fastNLP/models/sequencce_modeling.py b/fastNLP/models/sequencce_modeling.py
index ba96d4b6..96f09f80 100644
--- a/fastNLP/models/sequencce_modeling.py
+++ b/fastNLP/models/sequencce_modeling.py
@@ -12,7 +12,7 @@ class SeqLabeling(BaseModel):
     """
 
     def __init__(self, hidden_dim,
-                 rnn_num_layerd,
+                 rnn_num_layer,
                  num_classes,
                  vocab_size,
                  word_emb_dim=100,
@@ -29,7 +29,7 @@ class SeqLabeling(BaseModel):
 
         self.num_classes = num_classes
         self.input_dim = word_emb_dim
-        self.layers = rnn_num_layerd
+        self.layers = rnn_num_layer
         self.hidden_dim = hidden_dim
         self.bi_direction = bi_direction
         self.dropout = dropout
@@ -55,32 +55,26 @@ class SeqLabeling(BaseModel):
             self.crf = ContionalRandomField(num_classes)
 
     def forward(self, x):
-
-        x = self.embedding(x)
-        x, hidden = self.encode(x)
-        x = self.aggregate(x)
-        x = self.decode(x)
-        return x
-
-    def embedding(self, x):
-        return self.Emb(x)
-
-    def encode(self, x):
-        return self.rnn(x)
-
-    def aggregate(self, x):
-        return x
-
-    def decode(self, x):
-        x = self.linear(x)
-        return x
+        """
+        :param x: LongTensor, [batch_size, mex_len]
+        :return y: [batch_size, tag_size, tag_size]
+        """
+        x = self.Emb(x)
+        # [batch_size, max_len, word_emb_dim]
+        x, hidden = self.rnn(x)
+        # [batch_size, max_len, hidden_size * direction]
+        y = self.linear(x)
+        # [batch_size, max_len, num_classes]
+        return y
 
     def loss(self, x, y, mask, batch_size, max_len):
         """
         Negative log likelihood loss.
-        :param x:
-        :param y:
-        :param seq_len:
+        :param x: FloatTensor, [batch_size, tag_size, tag_size]
+        :param y: LongTensor, [batch_size, max_len]
+        :param mask: ByteTensor, [batch_size, max_len]
+        :param batch_size: int
+        :param max_len: int
         :return loss:
                 prediction:
         """
diff --git a/fastNLP/modules/CRF.py b/fastNLP/modules/CRF.py
index 6361b93d..96c84dca 100644
--- a/fastNLP/modules/CRF.py
+++ b/fastNLP/modules/CRF.py
@@ -82,7 +82,7 @@ class ContionalRandomField(nn.Module):
     def _glod_score(self, feats, tags, masks):
         """
         Compute the score for the gold path.
-        :param feats: FloatTensor, batch_size x tag_size x tag_size
+        :param feats: FloatTensor, batch_size x max_len x tag_size
         :param tags: LongTensor, batch_size x max_len
         :param masks: ByteTensor, batch_size x max_len
         :return:FloatTensor, batch_size
@@ -118,7 +118,7 @@ class ContionalRandomField(nn.Module):
     def forward(self, feats, tags, masks):
         """
         Calculate the neg log likelihood
-        :param feats:FloatTensor, batch_size x tag_size x tag_size
+        :param feats:FloatTensor, batch_size x max_len x tag_size
         :param tags:LongTensor, batch_size x max_len
         :param masks:ByteTensor batch_size x max_len
         :return:FloatTensor, batch_size

From c98d5924b585a7bfdc127e017d8cc2ff444d7e25 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Tue, 10 Jul 2018 20:46:35 +0800
Subject: [PATCH 5/7] sequence labeling ready to Train!

---
 fastNLP/action/trainer.py                     | 47 ++++++++++++-------
 ...encce_modeling.py => sequence_modeling.py} |  2 +-
 requirements.txt                              |  4 +-
 test/test_POS_pipeline.py                     |  9 ++--
 4 files changed, 39 insertions(+), 23 deletions(-)
 rename fastNLP/models/{sequencce_modeling.py => sequence_modeling.py} (98%)

diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 1f22ef28..6f51435a 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -1,5 +1,4 @@
 import _pickle
-from collections import namedtuple
 
 import numpy as np
 import torch
@@ -22,18 +21,22 @@ class BaseTrainer(Action):
         - grad_backward
         - get_loss
     """
-    TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"])
 
     def __init__(self, train_args):
         """
-        training parameters
+        :param train_args: dict of (key, value)
+
+        The base trainer requires the following keys:
+        - epochs: int, the number of epochs in training
+        - validate: bool, whether or not to validate on dev set
+        - batch_size: int
+        - pickle_path: str, the path to pickle files for pre-processing
         """
         super(BaseTrainer, self).__init__()
-        self.train_args = train_args
-        self.n_epochs = train_args.epochs
-        # self.validate = train_args.validate
-        self.batch_size = train_args.batch_size
-        self.pickle_path = train_args.pickle_path
+        self.n_epochs = train_args["epochs"]
+        self.validate = train_args["validate"]
+        self.batch_size = train_args["batch_size"]
+        self.pickle_path = train_args["pickle_path"]
         self.model = None
         self.iterator = None
         self.loss_func = None
@@ -66,8 +69,9 @@ class BaseTrainer(Action):
 
         for epoch in range(self.n_epochs):
             self.mode(test=False)
-
             self.define_optimizer()
+            self.iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))
+
             for step in range(iterations):
                 batch_x, batch_y = self.batchify(self.batch_size, data_train)
 
@@ -173,8 +177,6 @@ class BaseTrainer(Action):
         :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                  batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
         """
-        if self.iterator is None:
-            self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True))
         indices = next(self.iterator)
         batch = [data[idx] for idx in indices]
         batch_x = [sample[0] for sample in batch]
@@ -304,6 +306,7 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85)
 
     def get_loss(self, predict, truth):
+        truth = torch.Tensor(truth)
         self._loss = torch.nn.CrossEntropyLoss(predict, truth)
         return self._loss
 
@@ -316,13 +319,16 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer.step()
 
 
+
 class POSTrainer(BaseTrainer):
-    TrainConfig = namedtuple("config", ["epochs", "batch_size", "pickle_path", "num_classes", "vocab_size"])
+    """
+    Trainer for Sequence Modeling
 
+    """
     def __init__(self, train_args):
         super(POSTrainer, self).__init__(train_args)
-        self.vocab_size = train_args.vocab_size
-        self.num_classes = train_args.num_classes
+        self.vocab_size = train_args["vocab_size"]
+        self.num_classes = train_args["num_classes"]
         self.max_len = None
         self.mask = None
 
@@ -357,6 +363,13 @@ class POSTrainer(BaseTrainer):
     def define_optimizer(self):
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
 
+    def grad_backward(self, loss):
+        self.model.zero_grad()
+        loss.backward()
+
+    def update(self):
+        self.optimizer.step()
+
     def get_loss(self, predict, truth):
         """
         Compute loss given prediction and ground truth.
@@ -364,16 +377,18 @@ class POSTrainer(BaseTrainer):
         :param truth: ground truth label vector, [batch_size, max_len]
         :return: a scalar
         """
+        truth = torch.Tensor(truth)
         if self.loss_func is None:
             if hasattr(self.model, "loss"):
                 self.loss_func = self.model.loss
             else:
                 self.define_loss()
-        return self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
+        loss, prediction = self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
+        return loss
 
 
 if __name__ == "__name__":
-    train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./")
+    train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
     trainer = BaseTrainer(train_args)
     data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
     trainer.batchify(batch_size=3, data=data_train)
diff --git a/fastNLP/models/sequencce_modeling.py b/fastNLP/models/sequence_modeling.py
similarity index 98%
rename from fastNLP/models/sequencce_modeling.py
rename to fastNLP/models/sequence_modeling.py
index 96f09f80..80d13cf3 100644
--- a/fastNLP/models/sequencce_modeling.py
+++ b/fastNLP/models/sequence_modeling.py
@@ -81,7 +81,7 @@ class SeqLabeling(BaseModel):
         x = x.float()
         y = y.long()
         mask = mask.byte()
-        print(x.shape, y.shape, mask.shape)
+        # print(x.shape, y.shape, mask.shape)
 
         if self.use_crf:
             total_loss = self.crf(x, y, mask)
diff --git a/requirements.txt b/requirements.txt
index 0fc94538..d961dd92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-numpy==1.14.2
+numpy>=1.14.2
 torch==0.4.0
-torchvision==0.1.8
+torchvision>=0.1.8
diff --git a/test/test_POS_pipeline.py b/test/test_POS_pipeline.py
index 66e418c6..c6e3fd83 100644
--- a/test/test_POS_pipeline.py
+++ b/test/test_POS_pipeline.py
@@ -5,7 +5,7 @@ sys.path.append("..")
 from fastNLP.action.trainer import POSTrainer
 from fastNLP.loader.dataset_loader import POSDatasetLoader
 from fastNLP.loader.preprocess import POSPreprocess
-from fastNLP.models.sequencce_modeling import SeqLabeling
+from fastNLP.models.sequence_modeling import SeqLabeling
 
 data_name = "people.txt"
 data_path = "data_for_tests/people.txt"
@@ -22,13 +22,14 @@ if __name__ == "__main__":
     num_classes = p.num_classes
 
     # Trainer
-    train_args = POSTrainer.TrainConfig(epochs=20, batch_size=1, num_classes=num_classes,
-                                        vocab_size=vocab_size, pickle_path=pickle_path)
+    train_args = {"epochs": 20, "batch_size": 1, "num_classes": num_classes,
+                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": False}
     trainer = POSTrainer(train_args)
 
     # Model
     model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True)
 
-    # Start training.
+    # Start training
     trainer.train(model)
 
+    print("Training finished!")

From a73087e913ea6c7faad53a104983f87b0a8b2bef Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Tue, 10 Jul 2018 22:00:24 +0800
Subject: [PATCH 6/7] refactor Tester; Tester + Trainer for seq modeling work

---
 fastNLP/action/tester.py  | 161 +++++++++++++++++++++++++-------------
 fastNLP/action/trainer.py |  36 +++++----
 test/test_POS_pipeline.py |   2 +-
 3 files changed, 125 insertions(+), 74 deletions(-)

diff --git a/fastNLP/action/tester.py b/fastNLP/action/tester.py
index 7f660bb0..2a71cf4d 100644
--- a/fastNLP/action/tester.py
+++ b/fastNLP/action/tester.py
@@ -1,87 +1,136 @@
-from collections import namedtuple
+import _pickle
 
-import numpy as np
+import torch
 
 from fastNLP.action.action import Action
+from fastNLP.action.action import RandomSampler, Batchifier
+from fastNLP.modules.utils import seq_mask
 
 
-class Tester(Action):
+class BaseTester(Action):
     """docstring for Tester"""
 
-    TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output",
-                                       "save_loss", "batch_size"])
-
     def __init__(self, test_args):
         """
         :param test_args: named tuple
         """
-        super(Tester, self).__init__()
-        self.validate_in_training = test_args.validate_in_training
-        self.save_dev_input = test_args.save_dev_input
+        super(BaseTester, self).__init__()
+        self.validate_in_training = test_args["validate_in_training"]
         self.valid_x = None
         self.valid_y = None
-        self.save_output = test_args.save_output
+        self.save_output = test_args["save_output"]
         self.output = None
-        self.save_loss = test_args.save_loss
+        self.save_loss = test_args["save_loss"]
         self.mean_loss = None
-        self.batch_size = test_args.batch_size
-
-    def test(self, network, data):
-        print("testing")
-        network.mode(test=True)  # turn on the testing mode
-        if self.save_dev_input:
-            if self.valid_x is None:
-                valid_x, valid_y = network.prepare_input(data)
-                self.valid_x = valid_x
-                self.valid_y = valid_y
-            else:
-                valid_x = self.valid_x
-                valid_y = self.valid_y
-        else:
-            valid_x, valid_y = network.prepare_input(data)
+        self.batch_size = test_args["batch_size"]
+        self.pickle_path = test_args["pickle_path"]
+        self.iterator = None
 
-        # split into batches by self.batch_size
-        iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y)
+    def test(self, network):
+        # print("--------------testing----------------")
+        self.mode(network, test=True)
 
-        batch_output = list()
-        loss_history = list()
-        # turn on the testing mode of the network
-        network.mode(test=True)
+        dev_data = self.prepare_input(self.pickle_path)
 
-        for step in range(iterations):
-            batch_x, batch_y = test_batch_generator.__next__()
+        self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
 
-            # forward pass from test input to predicted output
-            prediction = network.data_forward(batch_x)
+        batch_output = list()
+        eval_history = list()
+        num_iter = len(dev_data) // self.batch_size
+
+        for step in range(num_iter):
+            batch_x, batch_y = self.batchify(dev_data)
 
-            loss = network.get_loss(prediction, batch_y)
+            prediction = self.data_forward(network, batch_x)
+            eval_results = self.evaluate(prediction, batch_y)
 
             if self.save_output:
-                batch_output.append(prediction.data)
+                batch_output.append(prediction)
             if self.save_loss:
-                loss_history.append(loss)
-                self.log(self.make_log(step, loss))
-
-        if self.save_loss:
-            self.mean_loss = np.mean(np.array(loss_history))
-        if self.save_output:
-            self.output = self.make_output(batch_output)
+                eval_history.append(eval_results)
 
-    @property
-    def loss(self):
-        return self.mean_loss
+    def prepare_input(self, data_path):
+        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+        return data_dev
 
-    @property
-    def result(self):
-        return self.output
+    def batchify(self, data):
+        """
+        1. Perform batching from data and produce a batch of training data.
+        2. Add padding.
+        :param data: list. Each entry is a sample, which is also a list of features and label(s).
+            E.g.
+                [
+                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
+                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
+                    ...
+                ]
+        :return batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
+                 batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
+        """
+        indices = next(self.iterator)
+        batch = [data[idx] for idx in indices]
+        batch_x = [sample[0] for sample in batch]
+        batch_y = [sample[1] for sample in batch]
+        batch_x = self.pad(batch_x)
+        return batch_x, batch_y
 
     @staticmethod
-    def make_output(batch_outputs):
-        # construct full prediction with batch outputs
-        return np.concatenate(batch_outputs, axis=0)
+    def pad(batch, fill=0):
+        """
+        Pad a batch of samples to maximum length.
+        :param batch: list of list
+        :param fill: word index to pad, default 0.
+        :return: a padded batch
+        """
+        max_length = max([len(x) for x in batch])
+        for idx, sample in enumerate(batch):
+            if len(sample) < max_length:
+                batch[idx] = sample + [fill * (max_length - len(sample))]
+        return batch
 
-    def load_config(self, args):
+    def data_forward(self, network, data):
         raise NotImplementedError
 
-    def load_dataset(self, args):
+    def evaluate(self, predict, truth):
         raise NotImplementedError
+
+    @property
+    def matrices(self):
+        raise NotImplementedError
+
+    def mode(self, model, test=True):
+        """To do: combine this function with Trainer"""
+        if test:
+            model.eval()
+        else:
+            model.train()
+
+
+class POSTester(BaseTester):
+    """
+    Tester for sequence labeling.
+    """
+
+    def __init__(self, test_args):
+        super(POSTester, self).__init__(test_args)
+        self.max_len = None
+        self.mask = None
+
+    def data_forward(self, network, x):
+        """To Do: combine with Trainer
+
+        :param network: the PyTorch model
+        :param x: list of list, [batch_size, max_len]
+        :return y: [batch_size, num_classes]
+        """
+        seq_len = [len(seq) for seq in x]
+        x = torch.Tensor(x).long()
+        self.batch_size = x.size(0)
+        self.max_len = x.size(1)
+        self.mask = seq_mask(seq_len, self.max_len)
+        y = network(x)
+        return y
+
+    def evaluate(self, predict, truth):
+        """To Do: """
+        return 0
diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 6f51435a..034b46ca 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -5,7 +5,7 @@ import torch
 
 from fastNLP.action.action import Action
 from fastNLP.action.action import RandomSampler, Batchifier
-from fastNLP.action.tester import Tester
+from fastNLP.action.tester import POSTester
 from fastNLP.modules.utils import seq_mask
 
 
@@ -43,7 +43,7 @@ class BaseTrainer(Action):
         self.optimizer = None
 
     def train(self, network):
-        """General training loop.
+        """General Training Steps
         :param network: a model
 
         The method is framework independent.
@@ -57,23 +57,27 @@ class BaseTrainer(Action):
             - update
         Subclasses must implement these methods with a specific framework.
         """
+        # prepare model and data
         self.model = network
         data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path)
 
-        test_args = Tester.TestConfig(save_output=True, validate_in_training=True,
-                                      save_dev_input=True, save_loss=True, batch_size=self.batch_size)
-        evaluator = Tester(test_args)
+        # define tester over dev data
+        valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
+                      "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path}
+        validator = POSTester(valid_args)
 
-        best_loss = 1e10
+        # main training epochs
         iterations = len(data_train) // self.batch_size
-
         for epoch in range(self.n_epochs):
+
+            # turn on network training mode; define optimizer; prepare batch iterator
             self.mode(test=False)
             self.define_optimizer()
             self.iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=True))
 
+            # training iterations in one epoch
             for step in range(iterations):
-                batch_x, batch_y = self.batchify(self.batch_size, data_train)
+                batch_x, batch_y = self.batchify(data_train)
 
                 prediction = self.data_forward(network, batch_x)
 
@@ -84,9 +88,7 @@ class BaseTrainer(Action):
             if self.validate:
                 if data_dev is None:
                     raise RuntimeError("No validation data provided.")
-                evaluator.test(network, data_dev)
-                if evaluator.loss < best_loss:
-                    best_loss = evaluator.loss
+                validator.test(network)
 
         # finish training
 
@@ -162,11 +164,10 @@ class BaseTrainer(Action):
         """
         raise NotImplementedError
 
-    def batchify(self, batch_size, data):
+    def batchify(self, data):
         """
         1. Perform batching from data and produce a batch of training data.
         2. Add padding.
-        :param batch_size: int, the size of a batch
         :param data: list. Each entry is a sample, which is also a list of features and label(s).
             E.g.
                 [
@@ -200,7 +201,9 @@ class BaseTrainer(Action):
 
 
 class ToyTrainer(BaseTrainer):
-    """A simple trainer for a PyTorch model."""
+    """
+        deprecated
+    """
 
     def __init__(self, train_args):
         super(ToyTrainer, self).__init__(train_args)
@@ -235,7 +238,7 @@ class ToyTrainer(BaseTrainer):
 
 class WordSegTrainer(BaseTrainer):
     """
-        reserve for changes
+        deprecated
     """
 
     def __init__(self, train_args):
@@ -319,7 +322,6 @@ class WordSegTrainer(BaseTrainer):
         self.optimizer.step()
 
 
-
 class POSTrainer(BaseTrainer):
     """
     Trainer for Sequence Modeling
@@ -391,4 +393,4 @@ if __name__ == "__name__":
     train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"}
     trainer = BaseTrainer(train_args)
     data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10]
-    trainer.batchify(batch_size=3, data=data_train)
+    trainer.batchify(data=data_train)
diff --git a/test/test_POS_pipeline.py b/test/test_POS_pipeline.py
index c6e3fd83..af22e3b9 100644
--- a/test/test_POS_pipeline.py
+++ b/test/test_POS_pipeline.py
@@ -23,7 +23,7 @@ if __name__ == "__main__":
 
     # Trainer
     train_args = {"epochs": 20, "batch_size": 1, "num_classes": num_classes,
-                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": False}
+                  "vocab_size": vocab_size, "pickle_path": pickle_path, "validate": True}
     trainer = POSTrainer(train_args)
 
     # Model

From 7514be6f30cafe6e7e16a1477ad61019985796f0 Mon Sep 17 00:00:00 2001
From: FengZiYjun <writerphone@163.com>
Date: Wed, 11 Jul 2018 21:51:35 +0800
Subject: [PATCH 7/7] - add validation loss into trainer.train - restructure:
 move reproduction outside - add evaluate in tester

---
 fastNLP/action/tester.py                      |  36 +++++++++++++-----
 fastNLP/action/trainer.py                     |   2 +
 fastNLP/reproduction/__init__.py              |   0
 .../CNN-sentence_classification/.gitignore    |   0
 .../CNN-sentence_classification/README.md     |   0
 .../CNN-sentence_classification/__init__.py   |   0
 .../CNN-sentence_classification/dataset.py    |   0
 .../CNN-sentence_classification/model.py      |   0
 .../rt-polaritydata/rt-polarity.neg           |   0
 .../rt-polaritydata/rt-polarity.pos           |   0
 .../CNN-sentence_classification/train.py      |   0
 .../Char-aware_NLM/LICENSE                    |   0
 .../Char-aware_NLM/README.md                  |   0
 .../Char-aware_NLM/__init__.py                |   0
 .../Char-aware_NLM/model.py                   |   0
 .../Char-aware_NLM/test.py                    |   0
 .../Char-aware_NLM/test.txt                   |   0
 .../Char-aware_NLM/train.py                   |   0
 .../Char-aware_NLM/train.txt                  |   0
 .../Char-aware_NLM/utilities.py               |   0
 .../Char-aware_NLM/valid.txt                  |   0
 .../HAN-document_classification/README.md     |   0
 .../HAN-document_classification/__init__.py   |   0
 .../data/test_samples.pkl                     | Bin
 .../data/train_samples.pkl                    | Bin
 .../data/yelp.word2vec                        | Bin
 .../HAN-document_classification/evaluate.py   |   0
 .../HAN-document_classification/model.py      |   0
 .../HAN-document_classification/preprocess.py |   0
 .../HAN-document_classification/train.py      |   0
 30 files changed, 29 insertions(+), 9 deletions(-)
 delete mode 100644 fastNLP/reproduction/__init__.py
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/.gitignore (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/README.md (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/__init__.py (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/dataset.py (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/model.py (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos (100%)
 rename {fastNLP/reproduction => reproduction}/CNN-sentence_classification/train.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/LICENSE (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/README.md (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/__init__.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/model.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/test.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/test.txt (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/train.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/train.txt (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/utilities.py (100%)
 rename {fastNLP/reproduction => reproduction}/Char-aware_NLM/valid.txt (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/README.md (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/__init__.py (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/data/test_samples.pkl (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/data/train_samples.pkl (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/data/yelp.word2vec (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/evaluate.py (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/model.py (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/preprocess.py (100%)
 rename {fastNLP/reproduction => reproduction}/HAN-document_classification/train.py (100%)

diff --git a/fastNLP/action/tester.py b/fastNLP/action/tester.py
index 2a71cf4d..9d32ec40 100644
--- a/fastNLP/action/tester.py
+++ b/fastNLP/action/tester.py
@@ -1,5 +1,6 @@
 import _pickle
 
+import numpy as np
 import torch
 
 from fastNLP.action.action import Action
@@ -16,8 +17,7 @@ class BaseTester(Action):
         """
         super(BaseTester, self).__init__()
         self.validate_in_training = test_args["validate_in_training"]
-        self.valid_x = None
-        self.valid_y = None
+        self.save_dev_data = None
         self.save_output = test_args["save_output"]
         self.output = None
         self.save_loss = test_args["save_loss"]
@@ -26,8 +26,14 @@ class BaseTester(Action):
         self.pickle_path = test_args["pickle_path"]
         self.iterator = None
 
+        self.model = None
+        self.eval_history = []
+
     def test(self, network):
         # print("--------------testing----------------")
+        self.model = network
+
+        # turn on the testing mode; clean up the history
         self.mode(network, test=True)
 
         dev_data = self.prepare_input(self.pickle_path)
@@ -35,7 +41,6 @@ class BaseTester(Action):
         self.iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
 
         batch_output = list()
-        eval_history = list()
         num_iter = len(dev_data) // self.batch_size
 
         for step in range(num_iter):
@@ -47,11 +52,18 @@ class BaseTester(Action):
             if self.save_output:
                 batch_output.append(prediction)
             if self.save_loss:
-                eval_history.append(eval_results)
+                self.eval_history.append(eval_results)
 
     def prepare_input(self, data_path):
-        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
-        return data_dev
+        """
+        Save the dev data once it is loaded. Can return directly next time.
+        :param data_path: str, the path to the pickle data for dev
+        :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s).
+        """
+        if self.save_dev_data is None:
+            data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
+            self.save_dev_data = data_dev
+        return self.save_dev_data
 
     def batchify(self, data):
         """
@@ -99,11 +111,12 @@ class BaseTester(Action):
         raise NotImplementedError
 
     def mode(self, model, test=True):
-        """To do: combine this function with Trainer"""
+        """To do: combine this function with Trainer ?? """
         if test:
             model.eval()
         else:
             model.train()
+        self.eval_history.clear()
 
 
 class POSTester(BaseTester):
@@ -115,6 +128,7 @@ class POSTester(BaseTester):
         super(POSTester, self).__init__(test_args)
         self.max_len = None
         self.mask = None
+        self.batch_result = None
 
     def data_forward(self, network, x):
         """To Do: combine with Trainer
@@ -132,5 +146,9 @@ class POSTester(BaseTester):
         return y
 
     def evaluate(self, predict, truth):
-        """To Do: """
-        return 0
+        truth = torch.Tensor(truth)
+        loss, prediction = self.model.loss(predict, truth, self.mask, self.batch_size, self.max_len)
+        return loss.data
+
+    def matrices(self):
+        return np.mean(self.eval_history)
diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py
index 034b46ca..0ab9fee7 100644
--- a/fastNLP/action/trainer.py
+++ b/fastNLP/action/trainer.py
@@ -89,6 +89,7 @@ class BaseTrainer(Action):
                 if data_dev is None:
                     raise RuntimeError("No validation data provided.")
                 validator.test(network)
+                print("[epoch {}] dev loss={:.2f}".format(epoch, validator.matrices()))
 
         # finish training
 
@@ -386,6 +387,7 @@ class POSTrainer(BaseTrainer):
             else:
                 self.define_loss()
         loss, prediction = self.loss_func(predict, truth, self.mask, self.batch_size, self.max_len)
+        # print("loss={:.2f}".format(loss.data))
         return loss
 
 
diff --git a/fastNLP/reproduction/__init__.py b/fastNLP/reproduction/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/fastNLP/reproduction/CNN-sentence_classification/.gitignore b/reproduction/CNN-sentence_classification/.gitignore
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/.gitignore
rename to reproduction/CNN-sentence_classification/.gitignore
diff --git a/fastNLP/reproduction/CNN-sentence_classification/README.md b/reproduction/CNN-sentence_classification/README.md
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/README.md
rename to reproduction/CNN-sentence_classification/README.md
diff --git a/fastNLP/reproduction/CNN-sentence_classification/__init__.py b/reproduction/CNN-sentence_classification/__init__.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/__init__.py
rename to reproduction/CNN-sentence_classification/__init__.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/dataset.py b/reproduction/CNN-sentence_classification/dataset.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/dataset.py
rename to reproduction/CNN-sentence_classification/dataset.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/model.py b/reproduction/CNN-sentence_classification/model.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/model.py
rename to reproduction/CNN-sentence_classification/model.py
diff --git a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
rename to reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
diff --git a/fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
rename to reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
diff --git a/fastNLP/reproduction/CNN-sentence_classification/train.py b/reproduction/CNN-sentence_classification/train.py
similarity index 100%
rename from fastNLP/reproduction/CNN-sentence_classification/train.py
rename to reproduction/CNN-sentence_classification/train.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/LICENSE b/reproduction/Char-aware_NLM/LICENSE
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/LICENSE
rename to reproduction/Char-aware_NLM/LICENSE
diff --git a/fastNLP/reproduction/Char-aware_NLM/README.md b/reproduction/Char-aware_NLM/README.md
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/README.md
rename to reproduction/Char-aware_NLM/README.md
diff --git a/fastNLP/reproduction/Char-aware_NLM/__init__.py b/reproduction/Char-aware_NLM/__init__.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/__init__.py
rename to reproduction/Char-aware_NLM/__init__.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/model.py b/reproduction/Char-aware_NLM/model.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/model.py
rename to reproduction/Char-aware_NLM/model.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/test.py b/reproduction/Char-aware_NLM/test.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/test.py
rename to reproduction/Char-aware_NLM/test.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/test.txt b/reproduction/Char-aware_NLM/test.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/test.txt
rename to reproduction/Char-aware_NLM/test.txt
diff --git a/fastNLP/reproduction/Char-aware_NLM/train.py b/reproduction/Char-aware_NLM/train.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/train.py
rename to reproduction/Char-aware_NLM/train.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/train.txt b/reproduction/Char-aware_NLM/train.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/train.txt
rename to reproduction/Char-aware_NLM/train.txt
diff --git a/fastNLP/reproduction/Char-aware_NLM/utilities.py b/reproduction/Char-aware_NLM/utilities.py
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/utilities.py
rename to reproduction/Char-aware_NLM/utilities.py
diff --git a/fastNLP/reproduction/Char-aware_NLM/valid.txt b/reproduction/Char-aware_NLM/valid.txt
similarity index 100%
rename from fastNLP/reproduction/Char-aware_NLM/valid.txt
rename to reproduction/Char-aware_NLM/valid.txt
diff --git a/fastNLP/reproduction/HAN-document_classification/README.md b/reproduction/HAN-document_classification/README.md
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/README.md
rename to reproduction/HAN-document_classification/README.md
diff --git a/fastNLP/reproduction/HAN-document_classification/__init__.py b/reproduction/HAN-document_classification/__init__.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/__init__.py
rename to reproduction/HAN-document_classification/__init__.py
diff --git a/fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl b/reproduction/HAN-document_classification/data/test_samples.pkl
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/test_samples.pkl
rename to reproduction/HAN-document_classification/data/test_samples.pkl
diff --git a/fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl b/reproduction/HAN-document_classification/data/train_samples.pkl
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/train_samples.pkl
rename to reproduction/HAN-document_classification/data/train_samples.pkl
diff --git a/fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec b/reproduction/HAN-document_classification/data/yelp.word2vec
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/data/yelp.word2vec
rename to reproduction/HAN-document_classification/data/yelp.word2vec
diff --git a/fastNLP/reproduction/HAN-document_classification/evaluate.py b/reproduction/HAN-document_classification/evaluate.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/evaluate.py
rename to reproduction/HAN-document_classification/evaluate.py
diff --git a/fastNLP/reproduction/HAN-document_classification/model.py b/reproduction/HAN-document_classification/model.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/model.py
rename to reproduction/HAN-document_classification/model.py
diff --git a/fastNLP/reproduction/HAN-document_classification/preprocess.py b/reproduction/HAN-document_classification/preprocess.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/preprocess.py
rename to reproduction/HAN-document_classification/preprocess.py
diff --git a/fastNLP/reproduction/HAN-document_classification/train.py b/reproduction/HAN-document_classification/train.py
similarity index 100%
rename from fastNLP/reproduction/HAN-document_classification/train.py
rename to reproduction/HAN-document_classification/train.py