diff --git a/README.md b/README.md
index 8ad0f018..84d658fd 100644
--- a/README.md
+++ b/README.md
@@ -16,93 +16,19 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa
 - numpy>=1.14.2
 - torch==0.4.0
 - torchvision>=0.1.8
+- tensorboardX
 
 
 ## Resources
 
-- [Documentation](https://github.com/fastnlp/fastNLP)
+- [Documentation](https://fastnlp.readthedocs.io/en/latest/)
 - [Source Code](https://github.com/fastnlp/fastNLP)
 
-
-## Example
-
-### Basic Usage
-
-A typical fastNLP routine is composed of four phases: loading dataset, pre-processing data, constructing model and training model.
-```python
-from fastNLP.models.base_model import BaseModel
-from fastNLP.modules import encoder
-from fastNLP.modules import aggregation
-from fastNLP.modules import decoder
-
-from fastNLP.loader.dataset_loader import ClassDatasetLoader
-from fastNLP.loader.preprocess import ClassPreprocess
-from fastNLP.core.trainer import ClassificationTrainer
-from fastNLP.core.inference import ClassificationInfer
-
-
-class ClassificationModel(BaseModel):
-    """
-    Simple text classification model based on CNN.
-    """
-
-    def __init__(self, num_classes, vocab_size):
-        super(ClassificationModel, self).__init__()
-
-        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
-        self.enc = encoder.Conv(
-            in_channels=300, out_channels=100, kernel_size=3)
-        self.agg = aggregation.MaxPool()
-        self.dec = decoder.MLP(100, num_classes=num_classes)
-
-    def forward(self, x):
-        x = self.emb(x)  # [N,L] -> [N,L,C]
-        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
-        x = self.agg(x)  # [N,L,C] -> [N,C]
-        x = self.dec(x)  # [N,C] -> [N, N_class]
-        return x
-
-
-data_dir = 'data'  # directory to save data and model
-train_path = 'test/data_for_tests/text_classify.txt'  # training set file
-
-# load dataset
-ds_loader = ClassDatasetLoader("train", train_path)
-data = ds_loader.load()
-
-# pre-process dataset
-pre = ClassPreprocess(data_dir)
-vocab_size, n_classes = pre.process(data, "data_train.pkl")
-
-# construct model
-model_args = {
-    'num_classes': n_classes,
-    'vocab_size': vocab_size
-}
-model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)
-
-# train model
-train_args = {
-    "epochs": 20,
-    "batch_size": 50,
-    "pickle_path": data_dir,
-    "validate": False,
-    "save_best_dev": False,
-    "model_saved_path": None,
-    "use_cuda": True,
-    "learn_rate": 1e-3,
-    "momentum": 0.9}
-trainer = ClassificationTrainer(train_args)
-trainer.train(model)
-
-# predict using model
-seqs = [x[0] for x in data]
-infer = ClassificationInfer(data_dir)
-labels_pred = infer.predict(model, seqs)
-```
-
-
 ## Installation
+Run the following commands to install fastNLP package.
+```shell
+pip install fastNLP
+```
 
 ### Cloning From GitHub
 
@@ -122,20 +48,26 @@ conda install pytorch torchvision -c pytorch
 pip3 install torch torchvision
 ```
 
+### TensorboardX Installation
+
+```shell
+pip3 install tensorboardX
+```
 
 ## Project Structure
 
 ```
 FastNLP
 ├── docs
-│   └── quick_tutorial.md
 ├── fastNLP
-│   ├── action
+│   ├── core
 │   │   ├── action.py
-│   │   ├── inference.py
 │   │   ├── __init__.py
+│   │   ├── loss.py
 │   │   ├── metrics.py
 │   │   ├── optimizer.py
+│   │   ├── predictor.py
+│   │   ├── preprocess.py
 │   │   ├── README.md
 │   │   ├── tester.py
 │   │   └── trainer.py
@@ -147,71 +79,28 @@ FastNLP
 │   │   ├── dataset_loader.py
 │   │   ├── embed_loader.py
 │   │   ├── __init__.py
-│   │   ├── model_loader.py
-│   │   └── preprocess.py
+│   │   └── model_loader.py
 │   ├── models
-│   │   ├── base_model.py
-│   │   ├── char_language_model.py
-│   │   ├── cnn_text_classification.py
-│   │   ├── __init__.py
-│   │   └── sequence_modeling.py
 │   ├── modules
 │   │   ├── aggregation
-│   │   │   ├── attention.py
-│   │   │   ├── avg_pool.py
-│   │   │   ├── __init__.py
-│   │   │   ├── kmax_pool.py
-│   │   │   ├── max_pool.py
-│   │   │   └── self_attention.py
 │   │   ├── decoder
-│   │   │   ├── CRF.py
-│   │   │   └── __init__.py
 │   │   ├── encoder
-│   │   │   ├── char_embedding.py
-│   │   │   ├── conv_maxpool.py
-│   │   │   ├── conv.py
-│   │   │   ├── embedding.py
-│   │   │   ├── __init__.py
-│   │   │   ├── linear.py
-│   │   │   ├── lstm.py
-│   │   │   ├── masked_rnn.py
-│   │   │   └── variational_rnn.py
 │   │   ├── __init__.py
 │   │   ├── interaction
-│   │   │   └── __init__.py
 │   │   ├── other_modules.py
 │   │   └── utils.py
 │   └── saver
-│       ├── base_saver.py
-│       ├── __init__.py
-│       ├── logger.py
-│       └── model_saver.py
 ├── LICENSE
 ├── README.md
 ├── reproduction
-│   ├── Char-aware_NLM
-│   │  
-│   ├── CNN-sentence_classification
-│   │  
-│   ├── HAN-document_classification
-│   │  
-│   └── LSTM+self_attention_sentiment_analysis
-|
 ├── requirements.txt
 ├── setup.py
 └── test
+    ├── core
     ├── data_for_tests
-    │   ├── charlm.txt
-    │   ├── config
-    │   ├── cws_test
-    │   ├── cws_train
-    │   ├── people_infer.txt
-    │   └── people.txt
-    ├── test_charlm.py
-    ├── test_cws.py
-    ├── test_fastNLP.py
-    ├── test_loader.py
-    ├── test_seq_labeling.py
-    ├── test_tester.py
-    └── test_trainer.py
+    ├── __init__.py
+    ├── loader
+    ├── modules
+    └── readme_example.py
+
 ```
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 3749c2cd..2809876b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
-sphinx
--e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
-sphinxcontrib.katex
\ No newline at end of file
+numpy>=1.14.2
+http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
+torchvision>=0.1.8
+sphinx-rtd-theme==0.4.1
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d4d73d2a..ff3639fa 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -42,6 +42,8 @@ release = '1.0'
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
+    'sphinx.ext.autosummary',
+
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/fastNLP.core.rst b/docs/source/fastNLP.core.rst
index 880be59f..13943f72 100644
--- a/docs/source/fastNLP.core.rst
+++ b/docs/source/fastNLP.core.rst
@@ -1,62 +1,54 @@
-fastNLP.core package
-====================
+fastNLP.core 
+=============
 
-Submodules
-----------
-
-fastNLP.core.action module
---------------------------
+fastNLP.core.action 
+--------------------
 
 .. automodule:: fastNLP.core.action
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.core.metrics module
----------------------------
+fastNLP.core.loss 
+------------------
+
+.. automodule:: fastNLP.core.loss
+    :members:
+
+fastNLP.core.metrics 
+---------------------
 
 .. automodule:: fastNLP.core.metrics
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.core.optimizer module
------------------------------
+fastNLP.core.optimizer 
+-----------------------
 
 .. automodule:: fastNLP.core.optimizer
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.core.predictor module
------------------------------
+fastNLP.core.predictor 
+-----------------------
 
 .. automodule:: fastNLP.core.predictor
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.core.tester module
---------------------------
+fastNLP.core.preprocess 
+------------------------
+
+.. automodule:: fastNLP.core.preprocess
+    :members:
+
+fastNLP.core.tester 
+--------------------
 
 .. automodule:: fastNLP.core.tester
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.core.trainer module
----------------------------
+fastNLP.core.trainer 
+---------------------
 
 .. automodule:: fastNLP.core.trainer
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.core
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.loader.rst b/docs/source/fastNLP.loader.rst
index 90123b5b..658e07ff 100644
--- a/docs/source/fastNLP.loader.rst
+++ b/docs/source/fastNLP.loader.rst
@@ -1,62 +1,36 @@
-fastNLP.loader package
-======================
+fastNLP.loader 
+===============
 
-Submodules
-----------
-
-fastNLP.loader.base\_loader module
-----------------------------------
+fastNLP.loader.base\_loader 
+----------------------------
 
 .. automodule:: fastNLP.loader.base_loader
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.loader.config\_loader module
-------------------------------------
+fastNLP.loader.config\_loader 
+------------------------------
 
 .. automodule:: fastNLP.loader.config_loader
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.loader.dataset\_loader module
--------------------------------------
+fastNLP.loader.dataset\_loader 
+-------------------------------
 
 .. automodule:: fastNLP.loader.dataset_loader
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.loader.embed\_loader module
------------------------------------
+fastNLP.loader.embed\_loader 
+-----------------------------
 
 .. automodule:: fastNLP.loader.embed_loader
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.loader.model\_loader module
------------------------------------
+fastNLP.loader.model\_loader 
+-----------------------------
 
 .. automodule:: fastNLP.loader.model_loader
     :members:
-    :undoc-members:
-    :show-inheritance:
-
-fastNLP.loader.preprocess module
---------------------------------
-
-.. automodule:: fastNLP.loader.preprocess
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.loader
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.models.rst b/docs/source/fastNLP.models.rst
index 49481ac1..f17b1d49 100644
--- a/docs/source/fastNLP.models.rst
+++ b/docs/source/fastNLP.models.rst
@@ -1,46 +1,30 @@
-fastNLP.models package
-======================
+fastNLP.models 
+===============
 
-Submodules
-----------
-
-fastNLP.models.base\_model module
----------------------------------
+fastNLP.models.base\_model 
+---------------------------
 
 .. automodule:: fastNLP.models.base_model
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.models.char\_language\_model module
--------------------------------------------
+fastNLP.models.char\_language\_model 
+-------------------------------------
 
 .. automodule:: fastNLP.models.char_language_model
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.models.cnn\_text\_classification module
------------------------------------------------
+fastNLP.models.cnn\_text\_classification 
+-----------------------------------------
 
 .. automodule:: fastNLP.models.cnn_text_classification
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.models.sequence\_modeling module
-----------------------------------------
+fastNLP.models.sequence\_modeling 
+----------------------------------
 
 .. automodule:: fastNLP.models.sequence_modeling
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.models
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.modules.aggregation.rst b/docs/source/fastNLP.modules.aggregation.rst
index 7106f7bc..bfaf8646 100644
--- a/docs/source/fastNLP.modules.aggregation.rst
+++ b/docs/source/fastNLP.modules.aggregation.rst
@@ -1,54 +1,36 @@
-fastNLP.modules.aggregation package
-===================================
+fastNLP.modules.aggregation 
+============================
 
-Submodules
-----------
-
-fastNLP.modules.aggregation.attention module
---------------------------------------------
+fastNLP.modules.aggregation.attention 
+--------------------------------------
 
 .. automodule:: fastNLP.modules.aggregation.attention
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.aggregation.avg\_pool module
---------------------------------------------
+fastNLP.modules.aggregation.avg\_pool 
+--------------------------------------
 
 .. automodule:: fastNLP.modules.aggregation.avg_pool
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.aggregation.kmax\_pool module
----------------------------------------------
+fastNLP.modules.aggregation.kmax\_pool 
+---------------------------------------
 
 .. automodule:: fastNLP.modules.aggregation.kmax_pool
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.aggregation.max\_pool module
---------------------------------------------
+fastNLP.modules.aggregation.max\_pool 
+--------------------------------------
 
 .. automodule:: fastNLP.modules.aggregation.max_pool
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.aggregation.self\_attention module
---------------------------------------------------
+fastNLP.modules.aggregation.self\_attention 
+--------------------------------------------
 
 .. automodule:: fastNLP.modules.aggregation.self_attention
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.modules.aggregation
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.modules.decoder.rst b/docs/source/fastNLP.modules.decoder.rst
index 914802da..6844543a 100644
--- a/docs/source/fastNLP.modules.decoder.rst
+++ b/docs/source/fastNLP.modules.decoder.rst
@@ -1,22 +1,18 @@
-fastNLP.modules.decoder package
-===============================
+fastNLP.modules.decoder 
+========================
 
-Submodules
-----------
-
-fastNLP.modules.decoder.CRF module
-----------------------------------
+fastNLP.modules.decoder.CRF 
+----------------------------
 
 .. automodule:: fastNLP.modules.decoder.CRF
     :members:
-    :undoc-members:
-    :show-inheritance:
 
+fastNLP.modules.decoder.MLP 
+----------------------------
+
+.. automodule:: fastNLP.modules.decoder.MLP
+    :members:
 
-Module contents
----------------
 
 .. automodule:: fastNLP.modules.decoder
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.modules.encoder.rst b/docs/source/fastNLP.modules.encoder.rst
index 3af14b64..41b4ce13 100644
--- a/docs/source/fastNLP.modules.encoder.rst
+++ b/docs/source/fastNLP.modules.encoder.rst
@@ -1,78 +1,54 @@
-fastNLP.modules.encoder package
-===============================
+fastNLP.modules.encoder 
+========================
 
-Submodules
-----------
-
-fastNLP.modules.encoder.char\_embedding module
-----------------------------------------------
+fastNLP.modules.encoder.char\_embedding 
+----------------------------------------
 
 .. automodule:: fastNLP.modules.encoder.char_embedding
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.conv module
------------------------------------
+fastNLP.modules.encoder.conv 
+-----------------------------
 
 .. automodule:: fastNLP.modules.encoder.conv
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.conv\_maxpool module
---------------------------------------------
+fastNLP.modules.encoder.conv\_maxpool 
+--------------------------------------
 
 .. automodule:: fastNLP.modules.encoder.conv_maxpool
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.embedding module
-----------------------------------------
+fastNLP.modules.encoder.embedding 
+----------------------------------
 
 .. automodule:: fastNLP.modules.encoder.embedding
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.linear module
--------------------------------------
+fastNLP.modules.encoder.linear 
+-------------------------------
 
 .. automodule:: fastNLP.modules.encoder.linear
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.lstm module
------------------------------------
+fastNLP.modules.encoder.lstm 
+-----------------------------
 
 .. automodule:: fastNLP.modules.encoder.lstm
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.masked\_rnn module
-------------------------------------------
+fastNLP.modules.encoder.masked\_rnn 
+------------------------------------
 
 .. automodule:: fastNLP.modules.encoder.masked_rnn
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.encoder.variational\_rnn module
------------------------------------------------
+fastNLP.modules.encoder.variational\_rnn 
+-----------------------------------------
 
 .. automodule:: fastNLP.modules.encoder.variational_rnn
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.modules.encoder
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.modules.interaction.rst b/docs/source/fastNLP.modules.interaction.rst
index 32552231..91a34268 100644
--- a/docs/source/fastNLP.modules.interaction.rst
+++ b/docs/source/fastNLP.modules.interaction.rst
@@ -1,10 +1,5 @@
-fastNLP.modules.interaction package
-===================================
-
-Module contents
----------------
+fastNLP.modules.interaction 
+============================
 
 .. automodule:: fastNLP.modules.interaction
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.modules.rst b/docs/source/fastNLP.modules.rst
index 5a6cac28..6ccdc21a 100644
--- a/docs/source/fastNLP.modules.rst
+++ b/docs/source/fastNLP.modules.rst
@@ -1,8 +1,5 @@
-fastNLP.modules package
-=======================
-
-Subpackages
------------
+fastNLP.modules 
+================
 
 .. toctree::
 
@@ -11,30 +8,18 @@ Subpackages
     fastNLP.modules.encoder
     fastNLP.modules.interaction
 
-Submodules
-----------
-
-fastNLP.modules.other\_modules module
--------------------------------------
+fastNLP.modules.other\_modules 
+-------------------------------
 
 .. automodule:: fastNLP.modules.other_modules
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.modules.utils module
-----------------------------
+fastNLP.modules.utils 
+----------------------
 
 .. automodule:: fastNLP.modules.utils
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.modules
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.rst b/docs/source/fastNLP.rst
index fbc3a922..bb5037ce 100644
--- a/docs/source/fastNLP.rst
+++ b/docs/source/fastNLP.rst
@@ -1,8 +1,5 @@
-fastNLP package
-===============
-
-Subpackages
------------
+fastNLP 
+========
 
 .. toctree::
 
@@ -12,22 +9,12 @@ Subpackages
     fastNLP.modules
     fastNLP.saver
 
-Submodules
-----------
-
-fastNLP.fastnlp module
-----------------------
+fastNLP.fastnlp 
+----------------
 
 .. automodule:: fastNLP.fastnlp
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/fastNLP.saver.rst b/docs/source/fastNLP.saver.rst
index 7699c2e8..daa6fbe8 100644
--- a/docs/source/fastNLP.saver.rst
+++ b/docs/source/fastNLP.saver.rst
@@ -1,30 +1,18 @@
-fastNLP.saver package
-=====================
+fastNLP.saver 
+==============
 
-Submodules
-----------
-
-fastNLP.saver.logger module
----------------------------
+fastNLP.saver.logger 
+---------------------
 
 .. automodule:: fastNLP.saver.logger
     :members:
-    :undoc-members:
-    :show-inheritance:
 
-fastNLP.saver.model\_saver module
----------------------------------
+fastNLP.saver.model\_saver 
+---------------------------
 
 .. automodule:: fastNLP.saver.model_saver
     :members:
-    :undoc-members:
-    :show-inheritance:
-
 
-Module contents
----------------
 
 .. automodule:: fastNLP.saver
     :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/figures/procedures_and_sequence_labeling.png b/docs/source/figures/procedures_and_sequence_labeling.png
new file mode 100644
index 00000000..06adc051
Binary files /dev/null and b/docs/source/figures/procedures_and_sequence_labeling.png differ
diff --git a/docs/source/figures/text_classification.png b/docs/source/figures/text_classification.png
new file mode 100644
index 00000000..5884c64e
Binary files /dev/null and b/docs/source/figures/text_classification.png differ
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1caf2373..37798321 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,16 +1,54 @@
-.. fastNLP documentation master file, created by
-   sphinx-quickstart on Mon Aug 20 17:06:44 2018.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+fastNLP documentation
+=====================
+fastNLP，目前仍在孵化中。
 
-Welcome to fastNLP's documentation!
-===================================
+
+Introduction
+------------
+
+fastNLP是一个基于PyTorch的模块化自然语言处理系统，用于快速开发NLP工具。 
+它将基于深度学习的NLP模型划分为不同的模块。 
+这些模块分为4类：encoder（编码），interaction（交互）, aggregration（聚合） and decoder（解码），
+而每个类别包含不同的实现模块。 
+
+大多数当前的NLP模型可以构建在这些模块上，这极大地简化了开发NLP模型的过程。 
+fastNLP的架构如下左图所示：
+
+.. image:: figures/procedures_and_sequence_labeling.png
+
+在constructing model部分，以序列标注（上右图）和文本分类（下图）为例进行说明：
+
+.. image:: figures/text_classification.png
+
+* encoder module：将输入编码为一些抽象表示，输入的是单词序列，输出向量序列。
+* interaction module：使表示中的信息相互交互，输入的是向量序列，输出的也是向量序列。
+* aggregation module：聚合和减少信息，输入向量序列，输出一个向量。
+* decoder module：将表示解码为输出，输出一个label（文本分类）或者输出label序列（序列标注）
+
+其中interaction module和aggregation module在模型中不一定存在，例如上面的序列标注模型。
+
+
+
+
+User's Guide
+------------
+.. toctree::
+   :maxdepth: 2
+
+   user/installation
+   user/quickstart
+
+
+API Reference
+-------------
+
+If you are looking for information on a specific function, class or
+method, this part of the documentation is for you.
 
 .. toctree::
-   :maxdepth: 4
-   :caption: Contents:
+   :maxdepth: 2
    
-   fastNLP
+   fastNLP API <fastNLP>
 
 
 
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
deleted file mode 100644
index e9a92cb7..00000000
--- a/docs/source/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-fastNLP
-=======
-
-.. toctree::
-   :maxdepth: 4
-
-   fastNLP
diff --git a/docs/source/user/installation.rst b/docs/source/user/installation.rst
new file mode 100644
index 00000000..0655041b
--- /dev/null
+++ b/docs/source/user/installation.rst
@@ -0,0 +1,31 @@
+============
+Installation
+============
+
+.. contents::
+   :local:
+
+
+Cloning From GitHub
+~~~~~~~~~~~~~~~~~~~
+
+If you just want to use fastNLP, use:
+
+.. code:: shell
+
+   git clone https://github.com/fastnlp/fastNLP
+   cd fastNLP
+   
+
+PyTorch Installation
+~~~~~~~~~~~~~~~~~~~~
+
+Visit the [PyTorch official website] for installation instructions based
+on your system. In general, you could use:
+
+.. code:: shell
+
+   # using conda
+   conda install pytorch torchvision -c pytorch
+   # or using pip
+   pip3 install torch torchvision
diff --git a/docs/source/user/quickstart.rst b/docs/source/user/quickstart.rst
new file mode 100644
index 00000000..c8340053
--- /dev/null
+++ b/docs/source/user/quickstart.rst
@@ -0,0 +1,84 @@
+==========
+Quickstart
+==========
+
+Example
+-------
+
+Basic Usage
+~~~~~~~~~~~
+
+A typical fastNLP routine is composed of four phases: loading dataset,
+pre-processing data, constructing model and training model.
+
+.. code:: python
+
+   from fastNLP.models.base_model import BaseModel
+   from fastNLP.modules import encoder
+   from fastNLP.modules import aggregation
+   from fastNLP.modules import decoder
+
+   from fastNLP.loader.dataset_loader import ClassDatasetLoader
+   from fastNLP.loader.preprocess import ClassPreprocess
+   from fastNLP.core.trainer import ClassificationTrainer
+   from fastNLP.core.inference import ClassificationInfer
+
+
+   class ClassificationModel(BaseModel):
+       """
+       Simple text classification model based on CNN.
+       """
+
+       def __init__(self, num_classes, vocab_size):
+           super(ClassificationModel, self).__init__()
+
+           self.emb = encoder.Embedding(nums=vocab_size, dims=300)
+           self.enc = encoder.Conv(
+               in_channels=300, out_channels=100, kernel_size=3)
+           self.agg = aggregation.MaxPool()
+           self.dec = decoder.MLP(100, num_classes=num_classes)
+
+       def forward(self, x):
+           x = self.emb(x)  # [N,L] -> [N,L,C]
+           x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
+           x = self.agg(x)  # [N,L,C] -> [N,C]
+           x = self.dec(x)  # [N,C] -> [N, N_class]
+           return x
+
+
+   data_dir = 'data'  # directory to save data and model
+   train_path = 'test/data_for_tests/text_classify.txt'  # training set file
+
+   # load dataset
+   ds_loader = ClassDatasetLoader("train", train_path)
+   data = ds_loader.load()
+
+   # pre-process dataset
+   pre = ClassPreprocess(data_dir)
+   vocab_size, n_classes = pre.process(data, "data_train.pkl")
+
+   # construct model
+   model_args = {
+       'num_classes': n_classes,
+       'vocab_size': vocab_size
+   }
+   model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)
+
+   # train model
+   train_args = {
+       "epochs": 20,
+       "batch_size": 50,
+       "pickle_path": data_dir,
+       "validate": False,
+       "save_best_dev": False,
+       "model_saved_path": None,
+       "use_cuda": True,
+       "learn_rate": 1e-3,
+       "momentum": 0.9}
+   trainer = ClassificationTrainer(train_args)
+   trainer.train(model)
+
+   # predict using model
+   seqs = [x[0] for x in data]
+   infer = ClassificationInfer(data_dir)
+   labels_pred = infer.predict(model, seqs)
\ No newline at end of file
diff --git a/fastNLP/core/action.py b/fastNLP/core/action.py
index 358db499..ef595cbb 100644
--- a/fastNLP/core/action.py
+++ b/fastNLP/core/action.py
@@ -1,7 +1,3 @@
-"""
-    This file defines Action(s) and sample methods.
-
-"""
 from collections import Counter
 
 import numpy as np
@@ -9,13 +5,12 @@ import torch
 
 
 class Action(object):
-    """
-        Operations shared by Trainer, Tester, or Inference.
+    """Operations shared by Trainer, Tester, or Inference.
+
         This is designed for reducing replicate codes.
             - make_batch: produce a min-batch of data. @staticmethod
             - pad: padding method used in sequence modeling. @staticmethod
             - mode: change network mode for either train or test. (for PyTorch) @staticmethod
-        The base Action shall define operations shared by as much task-specific Actions as possible.
     """
 
     def __init__(self):
@@ -24,18 +19,20 @@ class Action(object):
     @staticmethod
     def make_batch(iterator, use_cuda, output_length=True, max_len=None):
         """Batch and Pad data.
+
         :param iterator: an iterator, (object that implements __next__ method) which returns the next sample.
         :param use_cuda: bool, whether to use GPU
         :param output_length: bool, whether to output the original length of the sequence before padding. (default: True)
         :param max_len: int, maximum sequence length. Longer sequences will be clipped. (default: None)
-        :return
-        if output_length is True:
+        :return :
+
+        if output_length is True,
             (batch_x, seq_len): tuple of two elements
                      batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                      seq_len: list. The length of the pre-padded sequence, if output_length is True.
             batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
 
-        if output_length is False:
+        if output_length is False,
             batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
             batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
         """
@@ -77,21 +74,21 @@ class Action(object):
         return batch
 
     @staticmethod
-    def mode(model, test=False):
-        """
-        Train mode or Test mode. This is for PyTorch currently.
-        :param model:
-        :param test:
+    def mode(model, is_test=False):
+        """Train mode or Test mode. This is for PyTorch currently.
+
+        :param model: a PyTorch model
+        :param is_test: bool, whether in test mode or not.
         """
-        if test:
+        if is_test:
             model.eval()
         else:
             model.train()
 
 
 def convert_to_torch_tensor(data_list, use_cuda):
-    """
-    convert lists into (cuda) Tensors.
+    """Convert lists into (cuda) Tensors.
+
     :param data_list: 2-level lists
     :param use_cuda: bool, whether to use GPU or not
     :return data_list: PyTorch Tensor of shape [batch_size, max_seq_len]
@@ -103,8 +100,8 @@ def convert_to_torch_tensor(data_list, use_cuda):
 
 
 def k_means_1d(x, k, max_iter=100):
-    """
-    Perform k-means on 1-D data.
+    """Perform k-means on 1-D data.
+
     :param x: list of int, representing points in 1-D.
     :param k: the number of clusters required.
     :param max_iter: maximum iteration
@@ -132,21 +129,28 @@ def k_means_1d(x, k, max_iter=100):
 
 
 def k_means_bucketing(all_inst, buckets):
-    """
+    """Assign all instances into possible buckets using k-means, such that instances in the same bucket have similar lengths.
+
     :param all_inst: 3-level list
+            E.g. ::
+
                 [
                     [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
                     [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                     ...
                 ]
+
     :param buckets: list of int. The length of the list is the number of buckets. Each integer is the maximum length
         threshold for each bucket (This is usually None.).
     :return data: 2-level list
+            ::
+
                 [
                     [index_11, index_12, ...],  # bucket 1
                     [index_21, index_22, ...],  # bucket 2
                     ...
                 ]
+
     """
     bucket_data = [[] for _ in buckets]
     num_buckets = len(buckets)
@@ -160,11 +164,16 @@ def k_means_bucketing(all_inst, buckets):
 
 
 class BaseSampler(object):
-    """
-        Base class for all samplers.
+    """The base class of all samplers.
+
     """
 
     def __init__(self, data_set):
+        """
+
+        :param data_set: multi-level list, of shape [num_example, *]
+
+        """
         self.data_set_length = len(data_set)
         self.data = data_set
 
@@ -176,11 +185,16 @@ class BaseSampler(object):
 
 
 class SequentialSampler(BaseSampler):
-    """
-    Sample data in the original order.
+    """Sample data in the original order.
+
     """
 
     def __init__(self, data_set):
+        """
+
+        :param data_set: multi-level list
+
+        """
         super(SequentialSampler, self).__init__(data_set)
 
     def __iter__(self):
@@ -188,11 +202,16 @@ class SequentialSampler(BaseSampler):
 
 
 class RandomSampler(BaseSampler):
-    """
-    Sample data in random permutation order.
+    """Sample data in random permutation order.
+
     """
 
     def __init__(self, data_set):
+        """
+
+        :param data_set: multi-level list
+
+        """
         super(RandomSampler, self).__init__(data_set)
         self.order = np.random.permutation(self.data_set_length)
 
@@ -201,11 +220,18 @@ class RandomSampler(BaseSampler):
 
 
 class Batchifier(object):
-    """
-    Wrap random or sequential sampler to generate a mini-batch.
+    """Wrap random or sequential sampler to generate a mini-batch.
+
     """
 
     def __init__(self, sampler, batch_size, drop_last=True):
+        """
+
+        :param sampler: a Sampler object
+        :param batch_size: int, the size of the mini-batch
+        :param drop_last: bool, whether to drop the last examples that are not enough to make a mini-batch.
+
+        """
         super(Batchifier, self).__init__()
         self.sampler = sampler
         self.batch_size = batch_size
@@ -223,8 +249,7 @@ class Batchifier(object):
 
 
 class BucketBatchifier(Batchifier):
-    """
-    Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
+    """Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
     In sampling, first random choose a bucket. Then sample data from it.
     The number of buckets is decided dynamically by the variance of sentence lengths.
     """
@@ -237,6 +262,7 @@ class BucketBatchifier(Batchifier):
         :param num_buckets: int, number of buckets for grouping these sequences.
         :param drop_last: bool, useless currently.
         :param sampler: Sampler, useless currently.
+
         """
         super(BucketBatchifier, self).__init__(sampler, batch_size, drop_last)
         buckets = ([None] * num_buckets)
diff --git a/fastNLP/core/loss.py b/fastNLP/core/loss.py
index f83b4959..8d866bbf 100644
--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@@ -8,8 +8,13 @@ class Loss(object):
     """
 
     def __init__(self, args):
+        """
+
+        :param args: None or str, the name of a loss function.
+
+        """
         if args is None:
-            # this is useful when
+            # this is useful when Trainer.__init__ performs type check
             self._loss = None
         elif isinstance(args, str):
             self._loss = self._borrow_from_pytorch(args)
@@ -17,10 +22,19 @@ class Loss(object):
             raise NotImplementedError
 
     def get(self):
+        """
+
+        :return self._loss: the loss function
+        """
         return self._loss
 
     @staticmethod
     def _borrow_from_pytorch(loss_name):
+        """Given a name of a loss function, return it from PyTorch.
+
+        :param loss_name: str, the name of a loss function
+        :return loss: a PyTorch loss
+        """
         if loss_name == "cross_entropy":
             return torch.nn.CrossEntropyLoss()
         else:
diff --git a/fastNLP/core/metrics.py b/fastNLP/core/metrics.py
index c8d7fe52..7bf4b034 100644
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -1,11 +1,12 @@
 import warnings
+
 import numpy as np
 import torch
 
 
 def _conver_numpy(x):
-    """
-    convert input data to numpy array
+    """convert input data to numpy array
+
     """
     if isinstance(x, np.ndarray):
         return x
@@ -17,21 +18,20 @@ def _conver_numpy(x):
 
 
 def _check_same_len(*arrays, axis=0):
-    """
-    check if input array list has same length for one dimension
+    """check if input array list has same length for one dimension
+
     """
     lens = set([x.shape[axis] for x in arrays if x is not None])
     return len(lens) == 1
 
 
 def _label_types(y):
-    """
-    determine the type
-    "binary"
-    "multiclass"
-    "multiclass-multioutput"
-    "multilabel"
-    "unknown"
+    """Determine the type
+        - "binary"
+        - "multiclass"
+        - "multiclass-multioutput"
+        - "multilabel"
+        - "unknown"
     """
     # never squeeze the first dimension
     y = y.squeeze() if y.shape[0] > 1 else y.resize(1, -1)
@@ -46,8 +46,8 @@ def _label_types(y):
 
 
 def _check_data(y_true, y_pred):
-    """
-    check if y_true and y_pred is same type of data e.g both binary or multiclass
+    """Check if y_true and y_pred is same type of data e.g both binary or multiclass
+
     """
     y_true, y_pred = _conver_numpy(y_true), _conver_numpy(y_pred)
     if not _check_same_len(y_true, y_pred):
@@ -174,16 +174,13 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, digits
 
 
 def accuracy_topk(y_true, y_prob, k=1):
-    """
-    Compute accuracy of y_true matching top-k probable
+    """Compute accuracy of y_true matching top-k probable
     labels in y_prob.
 
-    Paras:
-        y_ture - ndarray, true label, [n_samples]
-        y_prob - ndarray, label probabilities, [n_samples, n_classes]
-        k - int, k in top-k
-    Returns:
-        accuracy of top-k
+        :param y_true: ndarray, true label, [n_samples]
+        :param y_prob: ndarray, label probabilities, [n_samples, n_classes]
+        :param k: int, k in top-k
+        :return :accuracy of top-k
     """
 
     y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
@@ -195,16 +192,14 @@ def accuracy_topk(y_true, y_prob, k=1):
 
 
 def pred_topk(y_prob, k=1):
-    """
-    Return top-k predicted labels and corresponding probabilities.
-
-    Args:
-        y_prob - ndarray, size [n_samples, n_classes], probabilities on labels
-        k - int, k of top-k
-    Returns:
-        y_pred_topk - ndarray, size [n_samples, k], predicted top-k labels
-        y_prob_topk - ndarray, size [n_samples, k], probabilities for
-            top-k labels
+    """Return top-k predicted labels and corresponding probabilities.
+
+
+        :param y_prob: ndarray, size [n_samples, n_classes], probabilities on labels
+        :param k: int, k of top-k
+    :returns
+        y_pred_topk: ndarray, size [n_samples, k], predicted top-k labels
+        y_prob_topk: ndarray, size [n_samples, k], probabilities for top-k labels
     """
 
     y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
diff --git a/fastNLP/core/optimizer.py b/fastNLP/core/optimizer.py
index e106fde0..ff2ee40e 100644
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -4,7 +4,6 @@ import torch
 class Optimizer(object):
     """Wrapper of optimizer from framework
 
-            names: arguments (type)
             1. Adam: lr (float), weight_decay (float)
             2. AdaGrad
             3. RMSProp
@@ -16,20 +15,29 @@ class Optimizer(object):
         """
         :param optimizer_name: str, the name of the optimizer
         :param kwargs: the arguments
+
         """
         self.optim_name = optimizer_name
         self.kwargs = kwargs
 
     @property
     def name(self):
+        """The name of the optimizer.
+
+        :return: str
+        """
         return self.optim_name
 
     @property
     def params(self):
+        """The arguments used to create the optimizer.
+
+        :return: dict of (str, *)
+        """
         return self.kwargs
 
     def construct_from_pytorch(self, model_params):
-        """construct a optimizer from framework over given model parameters"""
+        """Construct a optimizer from framework over given model parameters."""
 
         if self.optim_name in ["SGD", "sgd"]:
             if "lr" in self.kwargs:
diff --git a/fastNLP/core/predictor.py b/fastNLP/core/predictor.py
index 03a6e43c..d04a6ef0 100644
--- a/fastNLP/core/predictor.py
+++ b/fastNLP/core/predictor.py
@@ -70,7 +70,7 @@ class Predictor(object):
     def predict(self, network, data):
         """Perform inference using the trained model.
 
-        :param network: a PyTorch model
+        :param network: a PyTorch model (cpu)
         :param data: list of list of strings
         :return: list of list of strings, [num_examples, tag_seq_length]
         """
diff --git a/fastNLP/core/preprocess.py b/fastNLP/core/preprocess.py
index f950929e..f8142c36 100644
--- a/fastNLP/core/preprocess.py
+++ b/fastNLP/core/preprocess.py
@@ -17,12 +17,24 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1,
 # the first vocab in dict with the index = 5
 
 def save_pickle(obj, pickle_path, file_name):
+    """Save an object into a pickle file.
+
+    :param obj: an object
+    :param pickle_path: str, the directory where the pickle file is to be saved
+    :param file_name: str, the name of the pickle file. In general, it should be ended by "pkl".
+    """
     with open(os.path.join(pickle_path, file_name), "wb") as f:
         _pickle.dump(obj, f)
     print("{} saved in {}".format(file_name, pickle_path))
 
 
 def load_pickle(pickle_path, file_name):
+    """Load an object from a given pickle file.
+
+    :param pickle_path: str, the directory where the pickle file is.
+    :param file_name: str, the name of the pickle file.
+    :return obj: an object stored in the pickle
+    """
     with open(os.path.join(pickle_path, file_name), "rb") as f:
         obj = _pickle.load(f)
     print("{} loaded from {}".format(file_name, pickle_path))
@@ -30,7 +42,8 @@ def load_pickle(pickle_path, file_name):
 
 
 def pickle_exist(pickle_path, pickle_name):
-    """
+    """Check if a given pickle file exists in the directory.
+
     :param pickle_path: the directory of target pickle file
     :param pickle_name: the filename of target pickle file
     :return: True if file exists else False
@@ -45,6 +58,19 @@ def pickle_exist(pickle_path, pickle_name):
 
 
 class BasePreprocess(object):
+    """Base class of all preprocessors.
+    Preprocessors are responsible for converting data of strings into data of indices.
+    During the pre-processing, the following pickle files will be built:
+
+        - "word2id.pkl", a mapping from words(tokens) to indices
+        - "id2word.pkl", a reversed dictionary
+        - "label2id.pkl", a dictionary on labels
+        - "id2label.pkl", a reversed dictionary on labels
+
+    These four pickle files are expected to be saved in the given pickle directory once they are constructed.
+    Preprocessors will check if those files are already in the directory and will reuse them in future calls.
+    """
+
     def __init__(self):
         self.word2index = None
         self.label2index = None
@@ -59,6 +85,7 @@ class BasePreprocess(object):
 
     def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10):
         """Main preprocessing pipeline.
+
         :param train_dev_data: three-level list, with either single label or multiple labels in a sample.
         :param test_data: three-level list, with either single label or multiple labels in a sample. (optional)
         :param pickle_path: str, the path to save the pickle files.
@@ -67,6 +94,7 @@ class BasePreprocess(object):
         :param n_fold: int, the number of folds of cross validation. Only useful when cross_val is True.
         :return results: a tuple of datasets after preprocessing.
         """
+
         if pickle_exist(pickle_path, "word2id.pkl") and pickle_exist(pickle_path, "class2id.pkl"):
             self.word2index = load_pickle(pickle_path, "word2id.pkl")
             self.label2index = load_pickle(pickle_path, "class2id.pkl")
@@ -182,25 +210,31 @@ class SeqLabelPreprocess(BasePreprocess):
     """Preprocess pipeline, including building mapping from words to index, from index to words,
         from labels/classes to index, from index to labels/classes.
         data of three-level list which have multiple labels in each sample.
+        ::
+
             [
                 [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                 [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                 ...
             ]
+
     """
 
     def __init__(self):
         super(SeqLabelPreprocess, self).__init__()
 
     def build_dict(self, data):
-        """
-        Add new words with indices into self.word_dict, new labels with indices into self.label_dict.
+        """Add new words with indices into self.word_dict, new labels with indices into self.label_dict.
+
         :param data: three-level list
+            ::
+
             [
                 [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                 [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                 ...
             ]
+
         :return word2index: dict of {str, int}
                 label2index: dict of {str, int}
         """
@@ -216,14 +250,17 @@ class SeqLabelPreprocess(BasePreprocess):
         return word2index, label2index
 
     def to_index(self, data):
-        """
-        Convert word strings and label strings into indices.
+        """Convert word strings and label strings into indices.
+
         :param data: three-level list
+            ::
+
             [
                 [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                 [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                 ...
             ]
+
         :return data_index: the same shape as data, but each string is replaced by its corresponding index
         """
         data_index = []
@@ -242,11 +279,14 @@ class ClassPreprocess(BasePreprocess):
         Preprocess pipeline, including building mapping from words to index, from index to words,
         from labels/classes to index, from index to labels/classes.
         design for data of three-level list which has a single label in each sample.
+            ::
+
             [
                 [ [word_11, word_12, ...], label_1 ],
                 [ [word_21, word_22, ...], label_2 ],
                 ...
             ]
+
     """
 
     def __init__(self):
@@ -269,18 +309,21 @@ class ClassPreprocess(BasePreprocess):
 
             for word in sent:
                 if word not in word2index:
-                    word2index[word[0]] = len(word2index)
+                    word2index[word] = len(word2index)
         return word2index, label2index
 
     def to_index(self, data):
-        """
-        Convert word strings and label strings into indices.
+        """Convert word strings and label strings into indices.
+
         :param data: three-level list
+        ::
+
             [
                 [ [word_11, word_12, ...], label_1 ],
                 [ [word_21, word_22, ...], label_2 ],
                 ...
             ]
+
         :return data_index: the same shape as data, but each string is replaced by its corresponding index
         """
         data_index = []
@@ -295,14 +338,15 @@ class ClassPreprocess(BasePreprocess):
 
 
 def infer_preprocess(pickle_path, data):
-    """
-        Preprocess over inference data.
-        Transform three-level list of strings into that of index.
+    """Preprocess over inference data. Transform three-level list of strings into that of index.
+        ::
+
         [
             [word_11, word_12, ...],
             [word_21, word_22, ...],
             ...
         ]
+
     """
     word2index = load_pickle(pickle_path, "word2id.pkl")
     data_index = []
diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py
index c168822e..bcb6ba8c 100644
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -38,7 +38,7 @@ class BaseTester(object):
             Obviously, "required_args" is the subset of "default_args". 
             The value in "default_args" to the keys in "required_args" is simply for type check. 
         """
-        # TODO: required arguments
+        # add required arguments here
         required_args = {}
 
         for req_key in required_args:
@@ -56,7 +56,7 @@ class BaseTester(object):
                     logger.error(msg)
                     raise ValueError(msg)
             else:
-                # BeseTester doesn't care about extra arguments
+                # BaseTester doesn't care about extra arguments
                 pass
         print(default_args)
 
@@ -69,8 +69,8 @@ class BaseTester(object):
         self.print_every_step = default_args["print_every_step"]
 
         self._model = None
-        self.eval_history = []
-        self.batch_output = []
+        self.eval_history = []  # evaluation results of all batches
+        self.batch_output = []  # outputs of all batches
 
     def test(self, network, dev_data):
         if torch.cuda.is_available() and self.use_cuda:
@@ -83,10 +83,10 @@ class BaseTester(object):
         self.eval_history.clear()
         self.batch_output.clear()
 
-        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
+        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=False))
         step = 0
 
-        for batch_x, batch_y in self.make_batch(iterator, dev_data):
+        for batch_x, batch_y in self.make_batch(iterator):
             with torch.no_grad():
                 prediction = self.data_forward(network, batch_x)
                 eval_results = self.evaluate(prediction, batch_y)
@@ -99,7 +99,7 @@ class BaseTester(object):
             print_output = "[test step {}] {}".format(step, eval_results)
             logger.info(print_output)
             if self.print_every_step > 0 and step % self.print_every_step == 0:
-                print(print_output)
+                print(self.make_eval_output(prediction, eval_results))
             step += 1
 
     def mode(self, model, test):
@@ -115,28 +115,48 @@ class BaseTester(object):
         raise NotImplementedError
 
     def evaluate(self, predict, truth):
-        """Compute evaluation metrics for the model. """
+        """Compute evaluation metrics.
+
+        :param predict: Tensor
+        :param truth: Tensor
+        :return eval_results: can be anything. It will be stored in self.eval_history
+        """
         raise NotImplementedError
 
     @property
     def metrics(self):
-        """Return a list of metrics. """
+        """Compute and return metrics.
+        Use self.eval_history to compute metrics over the whole dev set.
+        Please refer to metrics.py for common metric functions.
+
+        :return : variable number of outputs
+        """
         raise NotImplementedError
 
-    def show_matrices(self):
-        """This is called by Trainer to print evaluation results on dev set during training.
+    def show_metrics(self):
+        """Customize evaluation outputs in Trainer.
+        Called by Trainer to print evaluation results on dev set during training.
+        Use self.metrics to fetch available metrics.
 
         :return print_str: str
         """
         raise NotImplementedError
 
-    def make_batch(self, iterator, data):
+    def make_batch(self, iterator):
         raise NotImplementedError
 
+    def make_eval_output(self, predictions, eval_results):
+        """Customize Tester outputs.
+
+        :param predictions: Tensor
+        :param eval_results: Tensor
+        :return: str, to be printed.
+        """
+        raise NotImplementedError
 
 class SeqLabelTester(BaseTester):
-    """
-    Tester for sequence labeling.
+    """Tester for sequence labeling.
+
     """
 
     def __init__(self, **test_args):
@@ -194,15 +214,15 @@ class SeqLabelTester(BaseTester):
         batch_accuracy = np.mean([x[1] for x in self.eval_history])
         return batch_loss, batch_accuracy
 
-    def show_matrices(self):
-        """
-        This is called by Trainer to print evaluation on dev set.
+    def show_metrics(self):
+        """This is called by Trainer to print evaluation on dev set.
+
         :return print_str: str
         """
         loss, accuracy = self.metrics()
         return "dev loss={:.2f}, accuracy={:.2f}".format(loss, accuracy)
 
-    def make_batch(self, iterator, data):
+    def make_batch(self, iterator):
         return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True)
 
 
@@ -211,12 +231,12 @@ class ClassificationTester(BaseTester):
 
     def __init__(self, **test_args):
         """
-        :param test_args: a dict-like object that has __getitem__ method, \
+        :param test_args: a dict-like object that has __getitem__ method.
             can be accessed by "test_args["key_str"]"
         """
         super(ClassificationTester, self).__init__(**test_args)
 
-    def make_batch(self, iterator, data, max_len=None):
+    def make_batch(self, iterator, max_len=None):
         return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len)
 
     def data_forward(self, network, x):
diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py
index 7fc34da0..523a1763 100644
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -1,10 +1,11 @@
-import _pickle
 import copy
 import os
 import time
 from datetime import timedelta
 
 import torch
+import tensorboardX
+from tensorboardX import SummaryWriter
 
 from fastNLP.core.action import Action
 from fastNLP.core.action import RandomSampler, Batchifier
@@ -15,16 +16,12 @@ from fastNLP.modules import utils
 from fastNLP.saver.logger import create_logger
 from fastNLP.saver.model_saver import ModelSaver
 
-DEFAULT_QUEUE_SIZE = 300
 logger = create_logger(__name__, "./train_test.log")
 
 
 class BaseTrainer(object):
-    """Operations to train a model, including data loading, SGD, and validation.
+    """Operations of training a model, including data loading, gradient descent, and validation.
 
-        Subclasses must implement the following abstract methods:
-        - grad_backward
-        - get_loss
     """
 
     def __init__(self, **kwargs):
@@ -32,10 +29,10 @@ class BaseTrainer(object):
         :param kwargs: dict of (key, value), or dict-like object. key is str.
 
         The base trainer requires the following keys:
-        - epochs: int, the number of epochs in training
-        - validate: bool, whether or not to validate on dev set
-        - batch_size: int
-        - pickle_path: str, the path to pickle files for pre-processing
+            - epochs: int, the number of epochs in training
+            - validate: bool, whether or not to validate on dev set
+            - batch_size: int
+            - pickle_path: str, the path to pickle files for pre-processing
         """
         super(BaseTrainer, self).__init__()
 
@@ -47,7 +44,7 @@ class BaseTrainer(object):
         """
         default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
                         "save_best_dev": True, "model_name": "default_model_name.pkl", "print_every_step": 1,
-                        "loss": Loss(None),
+                        "loss": Loss(None),  # used to pass type check
                         "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0)
                         }
         """
@@ -56,7 +53,7 @@ class BaseTrainer(object):
             Obviously, "required_args" is the subset of "default_args". 
             The value in "default_args" to the keys in "required_args" is simply for type check. 
         """
-        # TODO: required arguments
+        # add required arguments here
         required_args = {}
 
         for req_key in required_args:
@@ -91,9 +88,12 @@ class BaseTrainer(object):
         self._loss_func = default_args["loss"].get()  # return a pytorch loss function or None
         self._optimizer = None
         self._optimizer_proto = default_args["optimizer"]
+        self._summary_writer = SummaryWriter(self.pickle_path + 'tensorboard_logs')
+        self._graph_summaried = False
 
     def train(self, network, train_data, dev_data=None):
         """General Training Procedure
+
         :param network: a model
         :param train_data: three-level list, the training set.
         :param dev_data: three-level list, the validation data (optional)
@@ -144,12 +144,13 @@ class BaseTrainer(object):
                     print("Saved better model selected by validation.")
                     logger.info("Saved better model selected by validation.")
 
-                valid_results = validator.show_matrices()
+                valid_results = validator.show_metrics()
                 print("[epoch {}] {}".format(epoch, valid_results))
                 logger.info("[epoch {}] {}".format(epoch, valid_results))
 
     def _train_step(self, data_iterator, network, **kwargs):
         """Training process in one epoch.
+
             kwargs should contain:
                 - n_print: int, print training information every n steps.
                 - start: time.time(), the starting time of this step.
@@ -163,6 +164,11 @@ class BaseTrainer(object):
             loss = self.get_loss(prediction, batch_y)
             self.grad_backward(loss)
             self.update()
+            self._summary_writer.add_scalar("loss", loss.item(), global_step=step)
+
+            if not self._graph_summaried:
+                self._summary_writer.add_graph(network, batch_x)
+                self._graph_summaried = True
 
             if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
                 end = time.time()
@@ -198,21 +204,6 @@ class BaseTrainer(object):
             network_copy = copy.deepcopy(network)
             self.train(network_copy, train_data_cv[i], dev_data_cv[i])
 
-    def load_train_data(self, pickle_path):
-        """
-        For task-specific processing.
-        :param pickle_path:
-        :return data_train
-        """
-        file_path = os.path.join(pickle_path, "data_train.pkl")
-        if os.path.exists(file_path):
-            with open(file_path, 'rb') as f:
-                data = _pickle.load(f)
-        else:
-            logger.error("cannot find training data {}. invalid input path for training data.".format(file_path))
-            raise RuntimeError("cannot find training data {}".format(file_path))
-        return data
-
     def make_batch(self, iterator):
         raise NotImplementedError
 
@@ -220,14 +211,13 @@ class BaseTrainer(object):
         Action.mode(network, test)
 
     def define_optimizer(self):
-        """
-        Define framework-specific optimizer specified by the models.
+        """Define framework-specific optimizer specified by the models.
+
         """
         self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters())
 
     def update(self):
-        """
-        Perform weight update on a model.
+        """Perform weight update on a model.
 
         For PyTorch, just call optimizer to update.
         """
@@ -237,8 +227,8 @@ class BaseTrainer(object):
         raise NotImplementedError
 
     def grad_backward(self, loss):
-        """
-        Compute gradient with link rules.
+        """Compute gradient with link rules.
+
         :param loss: a scalar where back-prop starts
 
         For PyTorch, just do "loss.backward()"
@@ -247,8 +237,8 @@ class BaseTrainer(object):
         loss.backward()
 
     def get_loss(self, predict, truth):
-        """
-        Compute loss given prediction and ground truth.
+        """Compute loss given prediction and ground truth.
+
         :param predict: prediction label vector
         :param truth: ground truth label vector
         :return: a scalar
@@ -256,8 +246,9 @@ class BaseTrainer(object):
         return self._loss_func(predict, truth)
 
     def define_loss(self):
-        """
-        if the model defines a loss, use model's loss.
+        """Define a loss for the trainer.
+
+        If the model defines a loss, use model's loss.
         Otherwise, Trainer must has a loss argument, use it as loss.
         These two losses cannot be defined at the same time.
         Trainer does not handle loss definition or choose default losses.
@@ -274,7 +265,8 @@ class BaseTrainer(object):
             logger.info("The model didn't define loss, use Trainer's loss.")
 
     def best_eval_result(self, validator):
-        """
+        """Check if the current epoch yields better validation results.
+
         :param validator: a Tester instance
         :return: bool, True means current results on dev set is the best.
         """
@@ -289,15 +281,14 @@ class BaseTrainer(object):
         """
         if model_name[-4:] != ".pkl":
             model_name += ".pkl"
-        ModelSaver(self.pickle_path + model_name).save_pytorch(network)
+        ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network)
 
     def _create_validator(self, valid_args):
         raise NotImplementedError
 
 
 class SeqLabelTrainer(BaseTrainer):
-    """
-    Trainer for Sequence Labeling
+    """Trainer for Sequence Labeling
 
     """
 
@@ -327,11 +318,11 @@ class SeqLabelTrainer(BaseTrainer):
         return y
 
     def get_loss(self, predict, truth):
-        """
-        Compute loss given prediction and ground truth.
+        """Compute loss given prediction and ground truth.
+
         :param predict: prediction label vector, [batch_size, max_len, tag_size]
         :param truth: ground truth label vector, [batch_size, max_len]
-        :return: a scalar
+        :return loss: a scalar
         """
         batch_size, max_len = predict.size(0), predict.size(1)
         assert truth.shape == (batch_size, max_len)
diff --git a/fastNLP/fastnlp.py b/fastNLP/fastnlp.py
index 67204161..c76e6681 100644
--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@@ -1,3 +1,5 @@
+import os
+
 from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer
 from fastNLP.core.preprocess import load_pickle
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
@@ -31,6 +33,22 @@ FastNLP_MODEL_COLLECTION = {
         "type": "seq_label",
         "config_file_name": "config",
         "config_section_name": "text_class_model"
+    },
+    "pos_tag_model": {
+        "url": "",
+        "class": "sequence_modeling.AdvSeqLabel",
+        "pickle": "pos_tag_model_v_0.pkl",
+        "type": "seq_label",
+        "config_file_name": "pos_tag.config",
+        "config_section_name": "pos_tag_model"
+    },
+    "text_classify_model": {
+        "url": "",
+        "class": "cnn_text_classification.CNNText",
+        "pickle": "text_class_model_v0.pkl",
+        "type": "text_class",
+        "config_file_name": "text_classify.cfg",
+        "config_section_name": "model"
     }
 }
 
@@ -77,7 +95,7 @@ class FastNLP(object):
         print("Restore model class {}".format(str(model_class)))
 
         model_args = ConfigSection()
-        ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args})
+        ConfigLoader.load_config(os.path.join(self.model_dir, config_file), {section_name: model_args})
         print("Restore model hyper-parameters {}".format(str(model_args.data)))
 
         # fetch dictionary size and number of labels from pickle files
@@ -91,7 +109,7 @@ class FastNLP(object):
         print("Model constructed.")
 
         # To do: framework independent
-        ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"])
+        ModelLoader.load_pytorch(model, os.path.join(self.model_dir, FastNLP_MODEL_COLLECTION[model_name]["pickle"]))
         print("Model weights loaded.")
 
         self.model = model
@@ -259,3 +277,38 @@ def interpret_word_seg_results(char_seq, label_seq):
         else:
             raise ValueError("invalid label {}".format(label[0]))
     return words
+
+
+def interpret_cws_pos_results(char_seq, label_seq):
+    """Transform model output into user-friendly contents.
+
+    :param char_seq: list of string
+    :param label_seq: list of string, the same length as char_seq.
+    :return outputs: list of tuple (words, pos_tag):
+    """
+
+    def pos_tag_check(seq):
+        """check whether all entries are the same """
+        return len(set(seq)) <= 1
+
+    word = []
+    word_pos = []
+    outputs = []
+    for char, label in zip(char_seq, label_seq):
+        tmp = label.split("-")
+        cws_label, pos_tag = tmp[0], tmp[1]
+
+        if cws_label == "B" or cws_label == "M":
+            word.append(char)
+            word_pos.append(pos_tag)
+        elif cws_label == "E":
+            word.append(char)
+            word_pos.append(pos_tag)
+            if not pos_tag_check(word_pos):
+                raise RuntimeError("character-wise pos tags inconsistent. ")
+            outputs.append(("".join(word), word_pos[0]))
+            word.clear()
+            word_pos.clear()
+        elif cws_label == "S":
+            outputs.append((char, pos_tag))
+    return outputs
diff --git a/fastNLP/loader/base_loader.py b/fastNLP/loader/base_loader.py
index 45a379c1..808567fb 100644
--- a/fastNLP/loader/base_loader.py
+++ b/fastNLP/loader/base_loader.py
@@ -1,9 +1,8 @@
 class BaseLoader(object):
     """docstring for BaseLoader"""
 
-    def __init__(self, data_name, data_path):
+    def __init__(self, data_path):
         super(BaseLoader, self).__init__()
-        self.data_name = data_name
         self.data_path = data_path
 
     def load(self):
@@ -25,8 +24,8 @@ class ToyLoader0(BaseLoader):
         For charLM
     """
 
-    def __init__(self, name, path):
-        super(ToyLoader0, self).__init__(name, path)
+    def __init__(self, data_path):
+        super(ToyLoader0, self).__init__(data_path)
 
     def load(self):
         with open(self.data_path, 'r') as f:
diff --git a/fastNLP/loader/config_loader.py b/fastNLP/loader/config_loader.py
index 9e3ebc1c..20d791c4 100644
--- a/fastNLP/loader/config_loader.py
+++ b/fastNLP/loader/config_loader.py
@@ -9,7 +9,7 @@ class ConfigLoader(BaseLoader):
     """loader for configuration files"""
 
     def __int__(self, data_name, data_path):
-        super(ConfigLoader, self).__init__(data_name, data_path)
+        super(ConfigLoader, self).__init__(data_path)
         self.config = self.parse(super(ConfigLoader, self).load())
 
     @staticmethod
@@ -100,7 +100,7 @@ class ConfigSection(object):
 
 
 if __name__ == "__main__":
-    config = ConfigLoader('configLoader', 'there is no data')
+    config = ConfigLoader('there is no data')
 
     section = {'General': ConfigSection(), 'My': ConfigSection(), 'A': ConfigSection()}
     """
diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py
index 13a96030..2f03bd8a 100644
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -6,8 +6,8 @@ from fastNLP.loader.base_loader import BaseLoader
 class DatasetLoader(BaseLoader):
     """"loader for data sets"""
 
-    def __init__(self, data_name, data_path):
-        super(DatasetLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(DatasetLoader, self).__init__(data_path)
 
 
 class POSDatasetLoader(DatasetLoader):
@@ -31,8 +31,8 @@ class POSDatasetLoader(DatasetLoader):
     to label5.
     """
 
-    def __init__(self, data_name, data_path):
-        super(POSDatasetLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(POSDatasetLoader, self).__init__(data_path)
 
     def load(self):
         assert os.path.exists(self.data_path)
@@ -84,8 +84,8 @@ class TokenizeDatasetLoader(DatasetLoader):
     Data set loader for tokenization data sets
     """
 
-    def __init__(self, data_name, data_path):
-        super(TokenizeDatasetLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(TokenizeDatasetLoader, self).__init__(data_path)
 
     def load_pku(self, max_seq_len=32):
         """
@@ -138,8 +138,8 @@ class TokenizeDatasetLoader(DatasetLoader):
 class ClassDatasetLoader(DatasetLoader):
     """Loader for classification data sets"""
 
-    def __init__(self, data_name, data_path):
-        super(ClassDatasetLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(ClassDatasetLoader, self).__init__(data_path)
 
     def load(self):
         assert os.path.exists(self.data_path)
@@ -177,7 +177,7 @@ class ConllLoader(DatasetLoader):
         :param  str data_name: the name of the conll data set
         :param str data_path: the path to the conll data set
         """
-        super(ConllLoader, self).__init__(data_name, data_path)
+        super(ConllLoader, self).__init__(data_path)
         self.data_set = self.parse(self.load())
 
     def load(self):
@@ -209,8 +209,8 @@ class ConllLoader(DatasetLoader):
 
 
 class LMDatasetLoader(DatasetLoader):
-    def __init__(self, data_name, data_path):
-        super(LMDatasetLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(LMDatasetLoader, self).__init__(data_path)
 
     def load(self):
         if not os.path.exists(self.data_path):
@@ -220,13 +220,57 @@ class LMDatasetLoader(DatasetLoader):
         return text.strip().split()
 
 
-if __name__ == "__main__":
+class PeopleDailyCorpusLoader(DatasetLoader):
     """
-    data = POSDatasetLoader("xxx", "../../test/data_for_tests/people.txt").load_lines()
-    for example in data:
-        for w, l in zip(example[0], example[1]):
-            print(w, l)
+        People Daily Corpus: Chinese word segmentation, POS tag, NER
     """
 
-    ans = TokenizeDatasetLoader("xxx", "/home/zyfeng/Desktop/data/icwb2-data/training/test").load_pku()
-    print(ans)
+    def __init__(self, data_path):
+        super(PeopleDailyCorpusLoader, self).__init__(data_path)
+
+    def load(self):
+        with open(self.data_path, "r", encoding="utf-8") as f:
+            sents = f.readlines()
+
+        pos_tag_examples = []
+        ner_examples = []
+        for sent in sents:
+            inside_ne = False
+            sent_pos_tag = []
+            sent_words = []
+            sent_ner = []
+            words = sent.strip().split()[1:]
+            for word in words:
+                if "[" in word and "]" in word:
+                    ner_tag = "U"
+                    print(word)
+                elif "[" in word:
+                    inside_ne = True
+                    ner_tag = "B"
+                    word = word[1:]
+                elif "]" in word:
+                    ner_tag = "L"
+                    word = word[:word.index("]")]
+                    if inside_ne is True:
+                        inside_ne = False
+                    else:
+                        raise RuntimeError("only ] appears!")
+                else:
+                    if inside_ne is True:
+                        ner_tag = "I"
+                    else:
+                        ner_tag = "O"
+                tmp = word.split("/")
+                token, pos = tmp[0], tmp[1]
+                sent_ner.append(ner_tag)
+                sent_pos_tag.append(pos)
+                sent_words.append(token)
+            pos_tag_examples.append([sent_words, sent_pos_tag])
+            ner_examples.append([sent_words, sent_ner])
+        return pos_tag_examples, ner_examples
+
+if __name__ == "__main__":
+    loader = PeopleDailyCorpusLoader("./")
+    pos, ner = loader.load()
+    print(pos[:10])
+    print(ner[:10])
diff --git a/fastNLP/loader/embed_loader.py b/fastNLP/loader/embed_loader.py
index 4b70dd0b..a84f6335 100644
--- a/fastNLP/loader/embed_loader.py
+++ b/fastNLP/loader/embed_loader.py
@@ -1,8 +1,50 @@
+import _pickle
+import os
+
+import numpy as np
+
 from fastNLP.loader.base_loader import BaseLoader
 
 
 class EmbedLoader(BaseLoader):
     """docstring for EmbedLoader"""
 
-    def __init__(self, data_name, data_path):
-        super(EmbedLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(EmbedLoader, self).__init__(data_path)
+
+    @staticmethod
+    def load_embedding(emb_dim, emb_file, word_dict, emb_pkl):
+        """Load the pre-trained embedding and combine with the given dictionary.
+
+        :param emb_file: str, the pre-trained embedding.
+                The embedding file should have the following format:
+                    Each line is a word embedding, where a word string is followed by multiple floats.
+                    Floats are separated by space. The word and the first float are separated by space.
+        :param word_dict: dict, a mapping from word to index.
+        :param emb_dim: int, the dimension of the embedding. Should be the same as pre-trained embedding.
+        :param emb_pkl: str, the embedding pickle file.
+        :return embedding_np: numpy array of shape (len(word_dict), emb_dim)
+
+        TODO: fragile code
+        """
+        # If the embedding pickle exists, load it and return.
+        if os.path.exists(emb_pkl):
+            with open(emb_pkl, "rb") as f:
+                embedding_np = _pickle.load(f)
+            return embedding_np
+        # Otherwise, load the pre-trained embedding.
+        with open(emb_file, "r", encoding="utf-8") as f:
+            # begin with a random embedding
+            embedding_np = np.random.uniform(-1, 1, size=(len(word_dict), emb_dim))
+            for line in f:
+                line = line.strip().split()
+                if len(line) != emb_dim + 1:
+                    # skip this line if two embedding dimension not match
+                    continue
+                if line[0] in word_dict:
+                    # find the word and replace its embedding with a pre-trained one
+                    embedding_np[word_dict[line[0]]] = [float(i) for i in line[1:]]
+        # save and return the result
+        with open(emb_pkl, "wb") as f:
+            _pickle.dump(embedding_np, f)
+        return embedding_np
diff --git a/fastNLP/loader/model_loader.py b/fastNLP/loader/model_loader.py
index 1e1d4f8f..c07576b8 100644
--- a/fastNLP/loader/model_loader.py
+++ b/fastNLP/loader/model_loader.py
@@ -8,8 +8,8 @@ class ModelLoader(BaseLoader):
         Loader for models.
     """
 
-    def __init__(self, data_name, data_path):
-        super(ModelLoader, self).__init__(data_name, data_path)
+    def __init__(self, data_path):
+        super(ModelLoader, self).__init__(data_path)
 
     @staticmethod
     def load_pytorch(empty_model, model_path):
diff --git a/fastNLP/models/cnn_text_classification.py b/fastNLP/models/cnn_text_classification.py
index b6dcafb3..fc7388a5 100644
--- a/fastNLP/models/cnn_text_classification.py
+++ b/fastNLP/models/cnn_text_classification.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 
 # import torch.nn.functional as F
-from fastNLP.modules.encoder.conv_maxpool import ConvMaxpool
+import fastNLP.modules.encoder as encoder
 
 
 class CNNText(torch.nn.Module):
@@ -18,22 +18,22 @@ class CNNText(torch.nn.Module):
     def __init__(self, args):
         super(CNNText, self).__init__()
 
-        class_num = args["num_classes"]
+        num_classes = args["num_classes"]
         kernel_nums = [100, 100, 100]
         kernel_sizes = [3, 4, 5]
-        embed_num = args["vocab_size"]
+        vocab_size = args["vocab_size"]
         embed_dim = 300
         pretrained_embed = None
         drop_prob = 0.5
 
         # no support for pre-trained embedding currently
-        self.embed = nn.Embedding(embed_num, embed_dim, padding_idx=0)
-        self.conv_pool = ConvMaxpool(
+        self.embed = encoder.embedding.Embedding(vocab_size, embed_dim)
+        self.conv_pool = encoder.conv_maxpool.ConvMaxpool(
             in_channels=embed_dim,
             out_channels=kernel_nums,
             kernel_sizes=kernel_sizes)
         self.dropout = nn.Dropout(drop_prob)
-        self.fc = nn.Linear(sum(kernel_nums), class_num)
+        self.fc = encoder.linear.Linear(sum(kernel_nums), num_classes)
 
     def forward(self, x):
         x = self.embed(x)  # [N,L] -> [N,L,C]
diff --git a/fastNLP/modules/decoder/__init__.py b/fastNLP/modules/decoder/__init__.py
index 6c0e5141..7b8b2814 100644
--- a/fastNLP/modules/decoder/__init__.py
+++ b/fastNLP/modules/decoder/__init__.py
@@ -1,3 +1,4 @@
 from .CRF import ConditionalRandomField
+from .MLP import MLP
 
-__all__ = ["ConditionalRandomField"]
+__all__ = ["ConditionalRandomField", "MLP"]
diff --git a/fastNLP/modules/encoder/__init__.py b/fastNLP/modules/encoder/__init__.py
index b4e689a7..71b786b9 100644
--- a/fastNLP/modules/encoder/__init__.py
+++ b/fastNLP/modules/encoder/__init__.py
@@ -2,8 +2,10 @@ from .embedding import Embedding
 from .linear import Linear
 from .lstm import Lstm
 from .conv import Conv
+from .conv_maxpool import ConvMaxpool
 
 __all__ = ["Lstm",
            "Embedding",
            "Linear",
-           "Conv"]
+           "Conv",
+           "ConvMaxpool"]
diff --git a/fastNLP/modules/encoder/conv_maxpool.py b/fastNLP/modules/encoder/conv_maxpool.py
index 0012dce7..f666e7f9 100644
--- a/fastNLP/modules/encoder/conv_maxpool.py
+++ b/fastNLP/modules/encoder/conv_maxpool.py
@@ -4,6 +4,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_
 
 
 class ConvMaxpool(nn.Module):
@@ -21,6 +22,7 @@ class ConvMaxpool(nn.Module):
             if isinstance(kernel_sizes, int):
                 out_channels = [out_channels]
                 kernel_sizes = [kernel_sizes]
+
             self.convs = nn.ModuleList([nn.Conv1d(
                 in_channels=in_channels,
                 out_channels=oc,
@@ -31,6 +33,9 @@ class ConvMaxpool(nn.Module):
                 groups=groups,
                 bias=bias)
                 for oc, ks in zip(out_channels, kernel_sizes)])
+
+            for conv in self.convs:
+                xavier_uniform_(conv.weight)  # weight initialization
         else:
             raise Exception(
                 'Incorrect kernel sizes: should be list, tuple or int')
diff --git a/reproduction/chinese_word_seg/cws_train.py b/reproduction/chinese_word_seg/cws_train.py
deleted file mode 100644
index b63a9401..00000000
--- a/reproduction/chinese_word_seg/cws_train.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import sys
-
-sys.path.append("..")
-
-from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
-from fastNLP.core.trainer import SeqLabelTrainer
-from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
-from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
-from fastNLP.saver.model_saver import ModelSaver
-from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.core.tester import SeqLabelTester
-from fastNLP.models.sequence_modeling import SeqLabeling
-from fastNLP.core.predictor import Predictor
-
-data_name = "pku_training.utf8"
-cws_data_path = "/home/zyfeng/data/pku_training.utf8"
-pickle_path = "./save/"
-data_infer_path = "/home/zyfeng/data/pku_test.utf8"
-
-
-def infer():
-    # Load infer configuration, the same as test
-    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
-
-    # fetch dictionary size and number of labels from pickle files
-    word2index = load_pickle(pickle_path, "word2id.pkl")
-    test_args["vocab_size"] = len(word2index)
-    index2label = load_pickle(pickle_path, "id2class.pkl")
-    test_args["num_classes"] = len(index2label)
-
-    # Define the same model
-    model = SeqLabeling(test_args)
-
-    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
-    print("model loaded!")
-
-    # Data Loader
-    raw_data_loader = BaseLoader(data_name, data_infer_path)
-    infer_data = raw_data_loader.load_lines()
-
-    # Inference interface
-    infer = Predictor(pickle_path)
-    results = infer.predict(model, infer_data)
-
-    print(results)
-    print("Inference finished!")
-
-
-def train_test():
-    # Config Loader
-    train_args = ConfigSection()
-    test_args = ConfigSection()
-    ConfigLoader("good_name", "good_path").load_config("./cws.cfg", {"train": train_args, "test": test_args})
-
-    # Data Loader
-    loader = TokenizeDatasetLoader(data_name, cws_data_path)
-    train_data = loader.load_pku()
-
-    # Preprocessor
-    preprocess = SeqLabelPreprocess()
-    data_train, data_dev = preprocess.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
-    train_args["vocab_size"] = preprocess.vocab_size
-    train_args["num_classes"] = preprocess.num_classes
-
-    # Trainer
-    trainer = SeqLabelTrainer(train_args)
-
-    # Model
-    model = SeqLabeling(train_args)
-
-    # Start training
-    trainer.train(model, data_train, data_dev)
-    print("Training finished!")
-
-    # Saver
-    saver = ModelSaver("./save/saved_model.pkl")
-    saver.save_pytorch(model)
-    print("Model saved!")
-
-    # testing with validation set
-    test(data_dev)
-
-
-def test(test_data):
-    # Config Loader
-    train_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
-
-    # Define the same model
-    model = SeqLabeling(train_args)
-
-    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
-    print("model loaded!")
-
-    # Load test configuration
-    test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
-
-    # Tester
-    tester = SeqLabelTester(test_args)
-
-    # Start testing
-    tester.test(model, test_data)
-
-    # print test results
-    print(tester.show_matrices())
-    print("model tested!")
-
-
-if __name__ == "__main__":
-    train_test()
diff --git a/reproduction/chinese_word_segment/cws.cfg b/reproduction/chinese_word_segment/cws.cfg
index ab799428..033d3967 100644
--- a/reproduction/chinese_word_segment/cws.cfg
+++ b/reproduction/chinese_word_segment/cws.cfg
@@ -31,4 +31,16 @@ pickle_path = "./save/"
 use_crf = true
 use_cuda = true
 rnn_hidden_units = 100
+word_emb_dim = 100
+
+[model]
+save_output = true
+validate_in_training = true
+save_dev_input = false
+save_loss = true
+batch_size = 640
+pickle_path = "./save/"
+use_crf = true
+use_cuda = true
+rnn_hidden_units = 100
 word_emb_dim = 100
\ No newline at end of file
diff --git a/reproduction/chinese_word_segment/run.py b/reproduction/chinese_word_segment/run.py
index 66d01038..d0a22e84 100644
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@@ -27,7 +27,7 @@ data_infer_path = os.path.join(datadir, "infer.utf8")
 def infer():
     # Config Loader
     test_args = ConfigSection()
-    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
+    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})
 
     # fetch dictionary size and number of labels from pickle files
     word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -47,7 +47,7 @@ def infer():
         raise
 
     # Data Loader
-    raw_data_loader = BaseLoader(data_name, data_infer_path)
+    raw_data_loader = BaseLoader(data_infer_path)
     infer_data = raw_data_loader.load_lines()
     print('data loaded')
 
@@ -63,10 +63,10 @@ def train():
     # Config Loader
     train_args = ConfigSection()
     test_args = ConfigSection()
-    ConfigLoader("good_name", "good_path").load_config(cfgfile, {"train": train_args, "test": test_args})
+    ConfigLoader("good_path").load_config(cfgfile, {"train": train_args, "test": test_args})
 
     # Data Loader
-    loader = TokenizeDatasetLoader(data_name, cws_data_path)
+    loader = TokenizeDatasetLoader(cws_data_path)
     train_data = loader.load_pku()
 
     # Preprocessor
@@ -100,7 +100,7 @@ def train():
 def test():
     # Config Loader
     test_args = ConfigSection()
-    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
+    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})
 
     # fetch dictionary size and number of labels from pickle files
     word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -125,7 +125,7 @@ def test():
     tester.test(model, dev_data)
 
     # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
     print("model tested!")
 
 
diff --git a/reproduction/chinese_word_seg/cws.cfg b/reproduction/pos_tag_model/pos_tag.cfg
similarity index 59%
rename from reproduction/chinese_word_seg/cws.cfg
rename to reproduction/pos_tag_model/pos_tag.cfg
index cdcb4496..eb5e315d 100644
--- a/reproduction/chinese_word_seg/cws.cfg
+++ b/reproduction/pos_tag_model/pos_tag.cfg
@@ -1,29 +1,35 @@
 [train]
-epochs = 10
-batch_size = 32
+epochs = 30
+batch_size = 64
 pickle_path = "./save/"
 validate = true
 save_best_dev = true
 model_saved_path = "./save/"
 rnn_hidden_units = 100
-rnn_layers = 2
-rnn_bi_direction = true
 word_emb_dim = 100
-dropout = 0.5
 use_crf = true
 use_cuda = true
+print_every_step = 10
 
 [test]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
-batch_size = 64
+batch_size = 640
+pickle_path = "./save/"
+use_crf = true
+use_cuda = true
+
+
+[POS_test]
+save_output = true
+validate_in_training = true
+save_dev_input = false
+save_loss = true
+batch_size = 640
 pickle_path = "./save/"
-rnn_hidden_units = 100
-rnn_layers = 1
-rnn_bi_direction = true
-word_emb_dim = 100
-dropout = 0.5
 use_crf = true
 use_cuda = true
+rnn_hidden_units = 100
+word_emb_dim = 100
\ No newline at end of file
diff --git a/reproduction/pos_tag_model/train_pos_tag.py b/reproduction/pos_tag_model/train_pos_tag.py
new file mode 100644
index 00000000..87a9f7e8
--- /dev/null
+++ b/reproduction/pos_tag_model/train_pos_tag.py
@@ -0,0 +1,146 @@
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
+
+from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
+from fastNLP.core.trainer import SeqLabelTrainer
+from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader, BaseLoader
+from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
+from fastNLP.saver.model_saver import ModelSaver
+from fastNLP.loader.model_loader import ModelLoader
+from fastNLP.core.tester import SeqLabelTester
+from fastNLP.models.sequence_modeling import AdvSeqLabel
+from fastNLP.core.predictor import SeqLabelInfer
+
+# not in the file's dir
+if len(os.path.dirname(__file__)) != 0:
+    os.chdir(os.path.dirname(__file__))
+datadir = "/home/zyfeng/data/"
+cfgfile = './pos_tag.cfg'
+data_name = "CWS_POS_TAG_NER_people_daily.txt"
+
+pos_tag_data_path = os.path.join(datadir, data_name)
+pickle_path = "save"
+data_infer_path = os.path.join(datadir, "infer.utf8")
+
+
+def infer():
+    # Config Loader
+    test_args = ConfigSection()
+    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})
+
+    # fetch dictionary size and number of labels from pickle files
+    word2index = load_pickle(pickle_path, "word2id.pkl")
+    test_args["vocab_size"] = len(word2index)
+    index2label = load_pickle(pickle_path, "id2class.pkl")
+    test_args["num_classes"] = len(index2label)
+
+    # Define the same model
+    model = AdvSeqLabel(test_args)
+
+    try:
+        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+        print('model loaded!')
+    except Exception as e:
+        print('cannot load model!')
+        raise
+
+    # Data Loader
+    raw_data_loader = BaseLoader(data_infer_path)
+    infer_data = raw_data_loader.load_lines()
+    print('data loaded')
+
+    # Inference interface
+    infer = SeqLabelInfer(pickle_path)
+    results = infer.predict(model, infer_data)
+
+    print(results)
+    print("Inference finished!")
+
+
+def train():
+    # Config Loader
+    train_args = ConfigSection()
+    test_args = ConfigSection()
+    ConfigLoader("good_name").load_config(cfgfile, {"train": train_args, "test": test_args})
+
+    # Data Loader
+    loader = PeopleDailyCorpusLoader(pos_tag_data_path)
+    train_data, _ = loader.load()
+
+    # Preprocessor
+    preprocessor = SeqLabelPreprocess()
+    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
+    train_args["vocab_size"] = preprocessor.vocab_size
+    train_args["num_classes"] = preprocessor.num_classes
+
+    # Trainer
+    trainer = SeqLabelTrainer(**train_args.data)
+
+    # Model
+    model = AdvSeqLabel(train_args)
+    try:
+        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+        print('model parameter loaded!')
+    except Exception as e:
+        print("No saved model. Continue.")
+        pass
+
+    # Start training
+    trainer.train(model, data_train, data_dev)
+    print("Training finished!")
+
+    # Saver
+    saver = ModelSaver("./save/saved_model.pkl")
+    saver.save_pytorch(model)
+    print("Model saved!")
+
+
+def test():
+    # Config Loader
+    test_args = ConfigSection()
+    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})
+
+    # fetch dictionary size and number of labels from pickle files
+    word2index = load_pickle(pickle_path, "word2id.pkl")
+    test_args["vocab_size"] = len(word2index)
+    index2label = load_pickle(pickle_path, "id2class.pkl")
+    test_args["num_classes"] = len(index2label)
+
+    # load dev data
+    dev_data = load_pickle(pickle_path, "data_dev.pkl")
+
+    # Define the same model
+    model = AdvSeqLabel(test_args)
+
+    # Dump trained parameters into the model
+    ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
+    print("model loaded!")
+
+    # Tester
+    tester = SeqLabelTester(**test_args.data)
+
+    # Start testing
+    tester.test(model, dev_data)
+
+    # print test results
+    print(tester.show_metrics())
+    print("model tested!")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Run a chinese word segmentation model')
+    parser.add_argument('--mode', help='set the model\'s model', choices=['train', 'test', 'infer'])
+    args = parser.parse_args()
+    if args.mode == 'train':
+        train()
+    elif args.mode == 'test':
+        test()
+    elif args.mode == 'infer':
+        infer()
+    else:
+        print('no mode specified for model!')
+        parser.print_help()
diff --git a/requirements.txt b/requirements.txt
index d961dd92..954dd741 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.14.2
 torch==0.4.0
 torchvision>=0.1.8
+tensorboardX
diff --git a/setup.py b/setup.py
index e69de29b..25a645c5 100644
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# coding=utf-8
+from setuptools import setup, find_packages
+
+with open('README.md') as f:
+    readme = f.read()
+
+with open('LICENSE') as f:
+    license = f.read()
+
+with open('requirements.txt') as f:
+    reqs = f.read()
+
+setup(
+    name='fastNLP',
+    version='0.0.1',
+    description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team',
+    long_description=readme,
+    license=license,
+    author='fudanNLP',
+    python_requires='>=3.5',
+    packages=find_packages(),
+    install_requires=reqs.strip().split('\n'),
+)
diff --git a/test/core/test_action.py b/test/core/test_action.py
index 6ad1bd29..8d0f628b 100644
--- a/test/core/test_action.py
+++ b/test/core/test_action.py
@@ -1,9 +1,8 @@
-import os
-
 import unittest
 
 from fastNLP.core.action import Action, Batchifier, SequentialSampler
 
+
 class TestAction(unittest.TestCase):
     def test_case_1(self):
         x = [1, 2, 3, 4, 5, 6, 7, 8]
diff --git a/test/loader/test_loader.py b/test/loader/test_loader.py
index fe826a6f..d2f22166 100644
--- a/test/loader/test_loader.py
+++ b/test/loader/test_loader.py
@@ -1,13 +1,12 @@
-import os
 import configparser
-
 import json
+import os
 import unittest
 
-
 from fastNLP.loader.config_loader import ConfigSection, ConfigLoader
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, POSDatasetLoader, LMDatasetLoader
 
+
 class TestConfigLoader(unittest.TestCase):
     def test_case_ConfigLoader(self):
 
@@ -33,8 +32,8 @@ class TestConfigLoader(unittest.TestCase):
             return dict
 
         test_arg = ConfigSection()
-        ConfigLoader("config", "").load_config(os.path.join("./test/loader", "config"), {"test": test_arg})
-        #ConfigLoader("config", "").load_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config",
+        ConfigLoader("config").load_config(os.path.join("./test/loader", "config"), {"test": test_arg})
+        # ConfigLoader("config").load_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config",
         #                                       {"test": test_arg})
 
         #dict = read_section_from_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config", "test")
@@ -58,18 +57,18 @@ class TestConfigLoader(unittest.TestCase):
 
 class TestDatasetLoader(unittest.TestCase):
     def test_case_TokenizeDatasetLoader(self):
-        loader = TokenizeDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
+        loader = TokenizeDatasetLoader("./test/data_for_tests/cws_pku_utf_8")
         data = loader.load_pku(max_seq_len=32)
         print("pass TokenizeDatasetLoader test!")
 
     def test_case_POSDatasetLoader(self):
-        loader = POSDatasetLoader("people", "./test/data_for_tests/people.txt")
+        loader = POSDatasetLoader("./test/data_for_tests/people.txt")
         data = loader.load()
         datas = loader.load_lines()
         print("pass POSDatasetLoader test!")
 
     def test_case_LMDatasetLoader(self):
-        loader = LMDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8")
+        loader = LMDatasetLoader("./test/data_for_tests/cws_pku_utf_8")
         data = loader.load()
         datas = loader.load_lines()
         print("pass TokenizeDatasetLoader test!")
diff --git a/test/ner.py b/test/ner.py
deleted file mode 100644
index 150bd8c7..00000000
--- a/test/ner.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import _pickle
-import os
-
-import numpy as np
-import torch
-
-from fastNLP.core.preprocess import SeqLabelPreprocess
-from fastNLP.core.tester import SeqLabelTester
-from fastNLP.core.trainer import SeqLabelTrainer
-from fastNLP.models.sequence_modeling import AdvSeqLabel
-
-
-class MyNERTrainer(SeqLabelTrainer):
-    def __init__(self, train_args):
-        super(MyNERTrainer, self).__init__(train_args)
-        self.scheduler = None
-
-    def define_optimizer(self):
-        """
-        override
-        :return:
-        """
-        self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001)
-        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5)
-
-    def update(self):
-        """
-        override
-        :return:
-        """
-        self.optimizer.step()
-        self.scheduler.step()
-
-    def _create_validator(self, valid_args):
-        return MyNERTester(valid_args)
-
-    def best_eval_result(self, validator):
-        accuracy = validator.metrics()
-        if accuracy > self.best_accuracy:
-            self.best_accuracy = accuracy
-            return True
-        else:
-            return False
-
-
-class MyNERTester(SeqLabelTester):
-    def __init__(self, test_args):
-        super(MyNERTester, self).__init__(test_args)
-
-    def _evaluate(self, prediction, batch_y, seq_len):
-        """
-        :param prediction: [batch_size, seq_len, num_classes]
-        :param batch_y: [batch_size, seq_len]
-        :param seq_len: [batch_size]
-        :return:
-        """
-        summ = 0
-        correct = 0
-        _, indices = torch.max(prediction, 2)
-        for p, y, l in zip(indices, batch_y, seq_len):
-            summ += l
-            correct += np.sum(p[:l].cpu().numpy() == y[:l].cpu().numpy())
-        return float(correct / summ)
-
-    def evaluate(self, predict, truth):
-        return self._evaluate(predict, truth, self.seq_len)
-
-    def metrics(self):
-        return np.mean(self.eval_history)
-
-    def show_matrices(self):
-        return "dev accuracy={:.2f}".format(float(self.metrics()))
-
-
-def embedding_process(emb_file, word_dict, emb_dim, emb_pkl):
-    if os.path.exists(emb_pkl):
-        with open(emb_pkl, "rb") as f:
-            embedding_np = _pickle.load(f)
-        return embedding_np
-    with open(emb_file, "r", encoding="utf-8") as f:
-        embedding_np = np.random.uniform(-1, 1, size=(len(word_dict), emb_dim))
-        for line in f:
-            line = line.strip().split()
-            if len(line) != emb_dim + 1:
-                continue
-            if line[0] in word_dict:
-                embedding_np[word_dict[line[0]]] = [float(i) for i in line[1:]]
-    with open(emb_pkl, "wb") as f:
-        _pickle.dump(embedding_np, f)
-    return embedding_np
-
-
-def data_load(data_file):
-    with open(data_file, "r", encoding="utf-8") as f:
-        all_data = []
-        sent = []
-        label = []
-        for line in f:
-            line = line.strip().split()
-
-            if not len(line) <= 1:
-                sent.append(line[0])
-                label.append(line[1])
-            else:
-                all_data.append([sent, label])
-                sent = []
-                label = []
-    return all_data
-
-
-data_path = "data_for_tests/people.txt"
-pick_path = "data_for_tests/"
-emb_path = "data_for_tests/emb50.txt"
-save_path = "data_for_tests/"
-if __name__ == "__main__":
-    data = data_load(data_path)
-    preprocess = SeqLabelPreprocess()
-    data_train, data_dev = preprocess.run(data, pickle_path=pick_path, train_dev_split=0.3)
-    # emb = embedding_process(emb_path, p.word2index, 50, os.path.join(pick_path, "embedding.pkl"))
-    emb = None
-    args = {"epochs": 20,
-            "batch_size": 1,
-            "pickle_path": pick_path,
-            "validate": True,
-            "save_best_dev": True,
-            "model_saved_path": save_path,
-            "use_cuda": True,
-
-            "vocab_size": preprocess.vocab_size,
-            "num_classes": preprocess.num_classes,
-            "word_emb_dim": 50,
-            "rnn_hidden_units": 100
-            }
-    # emb = torch.Tensor(emb).float().cuda()
-    networks = AdvSeqLabel(args, emb)
-    trainer = MyNERTrainer(args)
-    trainer.train(networks, data_train, data_dev)
-    print("Training finished!")
diff --git a/test/ner_decode.py b/test/ner_decode.py
deleted file mode 100644
index 5c09cbd2..00000000
--- a/test/ner_decode.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import _pickle
-import os
-
-import torch
-
-from fastNLP.core.predictor import SeqLabelInfer
-from fastNLP.core.trainer import SeqLabelTrainer
-from fastNLP.loader.model_loader import ModelLoader
-from fastNLP.models.sequence_modeling import AdvSeqLabel
-
-
-class Decode(SeqLabelTrainer):
-    def __init__(self, args):
-        super(Decode, self).__init__(args)
-
-    def decoder(self, network, sents, model_path):
-        self.model = network
-        self.model.load_state_dict(torch.load(model_path))
-        out_put = []
-        self.mode(network, test=True)
-        for batch_x in sents:
-            prediction = self.data_forward(self.model, batch_x)
-
-            seq_tag = self.model.prediction(prediction, batch_x[1])
-
-            out_put.append(list(seq_tag)[0])
-        return out_put
-
-
-def process_sent(sents, word2id):
-    sents_num = []
-    for s in sents:
-        sent_num = []
-        for c in s:
-            if c in word2id:
-                sent_num.append(word2id[c])
-            else:
-                sent_num.append(word2id["<unk>"])
-        sents_num.append(([sent_num], [len(sent_num)]))  # batch_size is 1
-
-    return sents_num
-
-
-def process_tag(sents, tags, id2class):
-    Tags = []
-    for ttt in tags:
-        Tags.append([id2class[t] for t in ttt])
-
-    Segs = []
-    PosNers = []
-    for sent, tag in zip(sents, tags):
-        word__ = []
-        lll__ = []
-        for c, t in zip(sent, tag):
-
-            t = id2class[t]
-            l = t.split("-")
-            split_ = l[0]
-            pn = l[1]
-
-            if split_ == "S":
-                word__.append(c)
-                lll__.append(pn)
-                word_1 = ""
-            elif split_ == "E":
-                word_1 += c
-                word__.append(word_1)
-                lll__.append(pn)
-                word_1 = ""
-            elif split_ == "B":
-                word_1 = ""
-                word_1 += c
-            else:
-                word_1 += c
-        Segs.append(word__)
-        PosNers.append(lll__)
-    return Segs, PosNers
-
-
-pickle_path = "data_for_tests/"
-model_path = "data_for_tests/model_best_dev.pkl"
-if __name__ == "__main__":
-
-    with open(os.path.join(pickle_path, "id2word.pkl"), "rb") as f:
-        id2word = _pickle.load(f)
-    with open(os.path.join(pickle_path, "word2id.pkl"), "rb") as f:
-        word2id = _pickle.load(f)
-    with open(os.path.join(pickle_path, "id2class.pkl"), "rb") as f:
-        id2class = _pickle.load(f)
-
-    sent = ["中共中央总书记、国家主席江泽民",
-            "逆向处理输入序列并返回逆序后的序列"]  # here is input
-
-    args = {"epochs": 1,
-            "batch_size": 1,
-            "pickle_path": "data_for_tests/",
-            "validate": True,
-            "save_best_dev": True,
-            "model_saved_path": "data_for_tests/",
-            "use_cuda": False,
-
-            "vocab_size": len(word2id),
-            "num_classes": len(id2class),
-            "word_emb_dim": 50,
-            "rnn_hidden_units": 100,
-            }
-    """
-    network = AdvSeqLabel(args, None)
-    decoder_ = Decode(args)
-    tags_num = decoder_.decoder(network, process_sent(sent, word2id), model_path=model_path)
-    output_seg, output_pn = process_tag(sent, tags_num, id2class)  # here is output
-    print(output_seg)
-    print(output_pn)
-    """
-    # Define the same model
-    model = AdvSeqLabel(args, None)
-
-    # Dump trained parameters into the model
-    ModelLoader.load_pytorch(model, "./data_for_tests/model_best_dev.pkl")
-    print("model loaded!")
-
-    # Inference interface
-    infer = SeqLabelInfer(pickle_path)
-    sent = [[ch for ch in s] for s in sent]
-    results = infer.predict(model, sent)
-
-    for res in results:
-        print(res)
-    print("Inference finished!")
diff --git a/test/readme_example.py b/test/readme_example.py
index 17ac92c2..bc50c48b 100644
--- a/test/readme_example.py
+++ b/test/readme_example.py
@@ -1,19 +1,13 @@
-# python: 3.5
-# pytorch: 0.4
-
-################
-# Test cross validation.
-################
-
-from fastNLP.loader.preprocess import ClassPreprocess
-
+from fastNLP.core.loss import Loss
+from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.predictor import ClassificationInfer
+from fastNLP.core.preprocess import ClassPreprocess
 from fastNLP.core.trainer import ClassificationTrainer
 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import aggregation
-from fastNLP.modules import encoder
 from fastNLP.modules import decoder
+from fastNLP.modules import encoder
 
 
 class ClassificationModel(BaseModel):
@@ -28,7 +22,7 @@ class ClassificationModel(BaseModel):
         self.enc = encoder.Conv(
             in_channels=300, out_channels=100, kernel_size=3)
         self.agg = aggregation.MaxPool()
-        self.dec = decoder.MLP(100, num_classes=num_classes)
+        self.dec = decoder.MLP(size_layer=[100, num_classes])
 
     def forward(self, x):
         x = self.emb(x)  # [N,L] -> [N,L,C]
@@ -38,18 +32,17 @@ class ClassificationModel(BaseModel):
         return x
 
 
-data_dir = 'data'  # directory to save data and model
-train_path = 'test/data_for_tests/text_classify.txt'  # training set file
+data_dir = 'save/'  # directory to save data and model
+train_path = './data_for_tests/text_classify.txt'  # training set file
 
 # load dataset
-ds_loader = ClassDatasetLoader("train", train_path)
+ds_loader = ClassDatasetLoader(train_path)
 data = ds_loader.load()
 
 # pre-process dataset
-pre = ClassPreprocess(data, data_dir, cross_val=True, n_fold=5)
-# pre = ClassPreprocess(data, data_dir)
-n_classes = pre.num_classes
-vocab_size = pre.vocab_size
+pre = ClassPreprocess()
+train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir)
+n_classes, vocab_size = pre.num_classes, pre.vocab_size
 
 # construct model
 model_args = {
@@ -58,22 +51,25 @@ model_args = {
 }
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)
 
-# train model
+# construct trainer
 train_args = {
-    "epochs": 10,
-    "batch_size": 50,
+    "epochs": 3,
+    "batch_size": 16,
     "pickle_path": data_dir,
     "validate": False,
     "save_best_dev": False,
     "model_saved_path": None,
     "use_cuda": True,
-    "learn_rate": 1e-3,
-    "momentum": 0.9}
-trainer = ClassificationTrainer(train_args)
-# trainer.train(model, ['data_train.pkl', 'data_dev.pkl'])
-trainer.cross_validate(model)
+    "loss": Loss("cross_entropy"),
+    "optimizer": Optimizer("Adam", lr=0.001)
+}
+trainer = ClassificationTrainer(**train_args)
+
+# start training
+trainer.train(model, train_data=train_set, dev_data=dev_set)
 
 # predict using model
 data_infer = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
-labels_pred = infer.predict(model, data_infer)
\ No newline at end of file
+labels_pred = infer.predict(model.cpu(), data_infer)
+print(labels_pred)
diff --git a/test/seq_labeling.py b/test/seq_labeling.py
index a9488834..0f7a072b 100644
--- a/test/seq_labeling.py
+++ b/test/seq_labeling.py
@@ -33,7 +33,7 @@ data_infer_path = args.infer
 def infer():
     # Load infer configuration, the same as test
     test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args})
+    ConfigLoader("config.cfg").load_config(config_dir, {"POS_infer": test_args})
 
     # fetch dictionary size and number of labels from pickle files
     word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -49,7 +49,7 @@ def infer():
     print("model loaded!")
 
     # Data Loader
-    raw_data_loader = BaseLoader("xxx", data_infer_path)
+    raw_data_loader = BaseLoader(data_infer_path)
     infer_data = raw_data_loader.load_lines()
 
     # Inference interface
@@ -65,11 +65,11 @@ def train_and_test():
     # Config Loader
     trainer_args = ConfigSection()
     model_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config(config_dir, {
+    ConfigLoader("config.cfg").load_config(config_dir, {
         "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args})
 
     # Data Loader
-    pos_loader = POSDatasetLoader("xxx", data_path)
+    pos_loader = POSDatasetLoader(data_path)
     train_data = pos_loader.load_lines()
 
     # Preprocessor
@@ -117,7 +117,7 @@ def train_and_test():
 
     # Load test configuration
     tester_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args})
+    ConfigLoader("config.cfg").load_config(config_dir, {"test_seq_label_tester": tester_args})
 
     # Tester
     tester = SeqLabelTester(save_output=False,
@@ -134,10 +134,10 @@ def train_and_test():
     tester.test(model, data_dev)
 
     # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
     print("model tested!")
 
 
 if __name__ == "__main__":
-    train_and_test()
-    # infer()
+    # train_and_test()
+    infer()
diff --git a/test/test_cws.py b/test/test_cws.py
index bbbef67f..802d97ba 100644
--- a/test/test_cws.py
+++ b/test/test_cws.py
@@ -22,7 +22,7 @@ data_infer_path = "data_for_tests/people_infer.txt"
 def infer():
     # Load infer configuration, the same as test
     test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
+    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # fetch dictionary size and number of labels from pickle files
     word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -38,7 +38,7 @@ def infer():
     print("model loaded!")
 
     # Data Loader
-    raw_data_loader = BaseLoader(data_name, data_infer_path)
+    raw_data_loader = BaseLoader(data_infer_path)
     infer_data = raw_data_loader.load_lines()
     """
         Transform strings into list of list of strings. 
@@ -61,10 +61,10 @@ def infer():
 def train_test():
     # Config Loader
     train_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
+    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS": train_args})
 
     # Data Loader
-    loader = TokenizeDatasetLoader(data_name, cws_data_path)
+    loader = TokenizeDatasetLoader(cws_data_path)
     train_data = loader.load_pku()
 
     # Preprocessor
@@ -74,7 +74,7 @@ def train_test():
     train_args["num_classes"] = p.num_classes
 
     # Trainer
-    trainer = SeqLabelTrainer(train_args)
+    trainer = SeqLabelTrainer(**train_args.data)
 
     # Model
     model = SeqLabeling(train_args)
@@ -99,16 +99,16 @@ def train_test():
 
     # Load test configuration
     test_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
+    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})
 
     # Tester
-    tester = SeqLabelTester(test_args)
+    tester = SeqLabelTester(**test_args.data)
 
     # Start testing
     tester.test(model, data_train)
 
     # print test results
-    print(tester.show_matrices())
+    print(tester.show_metrics())
     print("model tested!")
 
 
diff --git a/test/test_fastNLP.py b/test/test_fastNLP.py
index 0776109a..92bc894f 100644
--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@@ -1,9 +1,12 @@
 import sys
+
 sys.path.append("..")
 from fastNLP.fastnlp import FastNLP
-from fastNLP.fastnlp import interpret_word_seg_results
+from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_results
 
 PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
+PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/"
+PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES = "/home/zyfeng/data/text_classify/"
 
 def word_seg():
     nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
@@ -39,5 +42,44 @@ def test_word_seg_interpret():
     print(interpret_word_seg_results(chars, labels))
 
 
+def test_interpret_cws_pos_results():
+    foo = [
+        [('这', 'S-r'), ('是', 'S-v'), ('最', 'S-d'), ('好', 'S-a'), ('的', 'S-u'), ('基', 'B-p'), ('于', 'E-p'), ('深', 'B-d'),
+         ('度', 'E-d'), ('学', 'B-v'), ('习', 'E-v'), ('的', 'S-u'), ('中', 'B-nz'), ('文', 'E-nz'), ('分', 'B-vn'),
+         ('词', 'E-vn'), ('系', 'B-n'), ('统', 'E-n'), ('。', 'S-w')]
+    ]
+    chars = [x[0] for x in foo[0]]
+    labels = [x[1] for x in foo[0]]
+    print(interpret_cws_pos_results(chars, labels))
+
+
+def pos_tag():
+    nlp = FastNLP(model_dir=PATH_TO_POS_TAG_PICKLE_FILES)
+    nlp.load("pos_tag_model", config_file="pos_tag.config", section_name="pos_tag_model")
+    text = ["这是最好的基于深度学习的中文分词系统。",
+            "大王叫我来巡山。",
+            "我党多年来致力于改善人民生活水平。"]
+    results = nlp.run(text)
+    for example in results:
+        words, labels = [], []
+        for res in example:
+            words.append(res[0])
+            labels.append(res[1])
+        print(interpret_cws_pos_results(words, labels))
+
+
+def text_classify():
+    nlp = FastNLP(model_dir=PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES)
+    nlp.load("text_classify_model", config_file="text_classify.cfg", section_name="model")
+    text = [
+        "世界物联网大会明日在京召开龙头股启动在即",
+        "乌鲁木齐市新增一处城市中心旅游目的地",
+        "朱元璋的大明朝真的源于明教吗？——告诉你一个真实的“明教”"]
+    results = nlp.run(text)
+    print(results)
+    """
+    ['finance', 'travel', 'history']
+    """
+
 if __name__ == "__main__":
-    word_seg()
+    text_classify()
diff --git a/test/test_tester.py b/test/test_tester.py
index 1c2658ef..e4ccf536 100644
--- a/test/test_tester.py
+++ b/test/test_tester.py
@@ -5,19 +5,19 @@ from fastNLP.loader.dataset_loader import TokenizeDatasetLoader
 from fastNLP.models.sequence_modeling import SeqLabeling
 
 data_name = "pku_training.utf8"
-cws_data_path = "/home/zyfeng/Desktop/data/pku_training.utf8"
 pickle_path = "data_for_tests"
 
 
 def foo():
-    loader = TokenizeDatasetLoader(data_name, "./data_for_tests/cws_pku_utf_8")
+    loader = TokenizeDatasetLoader("./data_for_tests/cws_pku_utf_8")
     train_data = loader.load_pku()
 
     train_args = ConfigSection()
-    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
+    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS": train_args})
 
     # Preprocessor
-    p = SeqLabelPreprocess(train_data, pickle_path)
+    p = SeqLabelPreprocess()
+    train_data = p.run(train_data)
     train_args["vocab_size"] = p.vocab_size
     train_args["num_classes"] = p.num_classes
 
@@ -26,11 +26,11 @@ def foo():
     valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                   "save_loss": True, "batch_size": 8, "pickle_path": "./data_for_tests/",
                   "use_cuda": True}
-    validator = SeqLabelTester(valid_args)
+    validator = SeqLabelTester(**valid_args)
 
     print("start validation.")
-    validator.test(model)
-    print(validator.show_matrices())
+    validator.test(model, train_data)
+    print(validator.show_metrics())
 
 
 if __name__ == "__main__":
diff --git a/test/text_classify.py b/test/text_classify.py
index 64294d37..6ff3c059 100644
--- a/test/text_classify.py
+++ b/test/text_classify.py
@@ -34,7 +34,7 @@ config_dir = args.config
 def infer():
     # load dataset
     print("Loading data...")
-    ds_loader = ClassDatasetLoader("train", train_data_dir)
+    ds_loader = ClassDatasetLoader(train_data_dir)
     data = ds_loader.load()
     unlabeled_data = [x[0] for x in data]
 
@@ -69,7 +69,7 @@ def train():
 
     # load dataset
     print("Loading data...")
-    ds_loader = ClassDatasetLoader("train", train_data_dir)
+    ds_loader = ClassDatasetLoader(train_data_dir)
     data = ds_loader.load()
     print(data[0])