Merge branch 'master' into test_code

7 years ago · be2f4aade3
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@

 [![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP)
 [![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP)
 [![PyPI version](https://badge.fury.io/py/fastNLP.svg)](https://badge.fury.io/py/fastNLP)
 ![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
 [![Documentation Status](https://readthedocs.org/projects/fastnlp/badge/?version=latest)](http://fastnlp.readthedocs.io/?badge=latest)

 fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below:

@@ -13,93 +16,19 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa
 - numpy>=1.14.2
 - torch==0.4.0
 - torchvision>=0.1.8
 - tensorboardX


 ## Resources

 - [Documentation](https://github.com/fastnlp/fastNLP)
 - [Documentation](https://fastnlp.readthedocs.io/en/latest/)
 - [Source Code](https://github.com/fastnlp/fastNLP)


 ## Example

 ### Basic Usage

 A typical fastNLP routine is composed of four phases: loading dataset, pre-processing data, constructing model and training model.
 ```python
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import encoder
 from fastNLP.modules import aggregation
 from fastNLP.modules import decoder

 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.loader.preprocess import ClassPreprocess
 from fastNLP.core.trainer import ClassificationTrainer
 from fastNLP.core.inference import ClassificationInfer


 class ClassificationModel(BaseModel):
    """
    Simple text classification model based on CNN.
    """

    def __init__(self, num_classes, vocab_size):
        super(ClassificationModel, self).__init__()

        self.emb = encoder.Embedding(nums=vocab_size, dims=300)
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.agg = aggregation.MaxPool()
        self.dec = decoder.MLP(100, num_classes=num_classes)

    def forward(self, x):
        x = self.emb(x)  # [N,L] -> [N,L,C]
        x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
        x = self.agg(x)  # [N,L,C] -> [N,C]
        x = self.dec(x)  # [N,C] -> [N, N_class]
        return x


 data_dir = 'data'  # directory to save data and model
 train_path = 'test/data_for_tests/text_classify.txt'  # training set file

 # load dataset
 ds_loader = ClassDatasetLoader("train", train_path)
 data = ds_loader.load()

 # pre-process dataset
 pre = ClassPreprocess(data_dir)
 vocab_size, n_classes = pre.process(data, "data_train.pkl")

 # construct model
 model_args = {
    'num_classes': n_classes,
    'vocab_size': vocab_size
 }
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

 # train model
 train_args = {
    "epochs": 20,
    "batch_size": 50,
    "pickle_path": data_dir,
    "validate": False,
    "save_best_dev": False,
    "model_saved_path": None,
    "use_cuda": True,
    "learn_rate": 1e-3,
    "momentum": 0.9}
 trainer = ClassificationTrainer(train_args)
 trainer.train(model)

 # predict using model
 seqs = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
 labels_pred = infer.predict(model, seqs)
 ```


 ## Installation
 Run the following commands to install fastNLP package.
 ```shell
 pip install fastNLP
 ```

 ### Cloning From GitHub

@@ -119,20 +48,26 @@ conda install pytorch torchvision -c pytorch
 pip3 install torch torchvision
 ```

 ### TensorboardX Installation

 ```shell
 pip3 install tensorboardX
 ```

 ## Project Structure

 ```
 FastNLP
 ├── docs
 │   └── quick_tutorial.md
 ├── fastNLP
 │   ├── action
 │   ├── core
 │   │   ├── action.py
 │   │   ├── inference.py
 │   │   ├── __init__.py
 │   │   ├── loss.py
 │   │   ├── metrics.py
 │   │   ├── optimizer.py
 │   │   ├── predictor.py
 │   │   ├── preprocess.py
 │   │   ├── README.md
 │   │   ├── tester.py
 │   │   └── trainer.py
@@ -144,71 +79,28 @@ FastNLP
 │   │   ├── dataset_loader.py
 │   │   ├── embed_loader.py
 │   │   ├── __init__.py
 │   │   ├── model_loader.py
 │   │   └── preprocess.py
 │   │   └── model_loader.py
 │   ├── models
 │   │   ├── base_model.py
 │   │   ├── char_language_model.py
 │   │   ├── cnn_text_classification.py
 │   │   ├── __init__.py
 │   │   └── sequence_modeling.py
 │   ├── modules
 │   │   ├── aggregation
 │   │   │   ├── attention.py
 │   │   │   ├── avg_pool.py
 │   │   │   ├── __init__.py
 │   │   │   ├── kmax_pool.py
 │   │   │   ├── max_pool.py
 │   │   │   └── self_attention.py
 │   │   ├── decoder
 │   │   │   ├── CRF.py
 │   │   │   └── __init__.py
 │   │   ├── encoder
 │   │   │   ├── char_embedding.py
 │   │   │   ├── conv_maxpool.py
 │   │   │   ├── conv.py
 │   │   │   ├── embedding.py
 │   │   │   ├── __init__.py
 │   │   │   ├── linear.py
 │   │   │   ├── lstm.py
 │   │   │   ├── masked_rnn.py
 │   │   │   └── variational_rnn.py
 │   │   ├── __init__.py
 │   │   ├── interaction
 │   │   │   └── __init__.py
 │   │   ├── other_modules.py
 │   │   └── utils.py
 │   └── saver
 │       ├── base_saver.py
 │       ├── __init__.py
 │       ├── logger.py
 │       └── model_saver.py
 ├── LICENSE
 ├── README.md
 ├── reproduction
 │   ├── Char-aware_NLM
 │   │  
 │   ├── CNN-sentence_classification
 │   │  
 │   ├── HAN-document_classification
 │   │  
 │   └── LSTM+self_attention_sentiment_analysis
 |
 ├── requirements.txt
 ├── setup.py
 └── test
    ├── core
    ├── data_for_tests
    │   ├── charlm.txt
    │   ├── config
    │   ├── cws_test
    │   ├── cws_train
    │   ├── people_infer.txt
    │   └── people.txt
    ├── test_charlm.py
    ├── test_cws.py
    ├── test_fastNLP.py
    ├── test_loader.py
    ├── test_seq_labeling.py
    ├── test_tester.py
    └── test_trainer.py
    ├── __init__.py
    ├── loader
    ├── modules
    └── readme_example.py

 ```
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
 sphinx
 -e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
 sphinxcontrib.katex
 numpy>=1.14.2
 http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
 torchvision>=0.1.8
 sphinx-rtd-theme==0.4.1
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -42,6 +42,8 @@ release = '1.0'
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.viewcode',
    'sphinx.ext.autosummary',

 ]

 # Add any paths that contain templates here, relative to this directory.
--- a/docs/source/fastNLP.core.rst
+++ b/docs/source/fastNLP.core.rst
@@ -1,62 +1,54 @@
 fastNLP.core package
 ====================
 fastNLP.core 
 =============

 Submodules
 ----------

 fastNLP.core.action module
 --------------------------
 fastNLP.core.action 
 --------------------

 .. automodule:: fastNLP.core.action
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.metrics module
 ---------------------------
 fastNLP.core.loss 
 ------------------

 .. automodule:: fastNLP.core.loss
    :members:

 fastNLP.core.metrics 
 ---------------------

 .. automodule:: fastNLP.core.metrics
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.optimizer module
 -----------------------------
 fastNLP.core.optimizer 
 -----------------------

 .. automodule:: fastNLP.core.optimizer
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.predictor module
 -----------------------------
 fastNLP.core.predictor 
 -----------------------

 .. automodule:: fastNLP.core.predictor
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.tester module
 --------------------------
 fastNLP.core.preprocess 
 ------------------------

 .. automodule:: fastNLP.core.preprocess
    :members:

 fastNLP.core.tester 
 --------------------

 .. automodule:: fastNLP.core.tester
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.core.trainer module
 ---------------------------
 fastNLP.core.trainer 
 ---------------------

 .. automodule:: fastNLP.core.trainer
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.core
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.loader.rst
+++ b/docs/source/fastNLP.loader.rst
@@ -1,62 +1,36 @@
 fastNLP.loader package
 ======================
 fastNLP.loader 
 ===============

 Submodules
 ----------

 fastNLP.loader.base\_loader module
 ----------------------------------
 fastNLP.loader.base\_loader 
 ----------------------------

 .. automodule:: fastNLP.loader.base_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.loader.config\_loader module
 ------------------------------------
 fastNLP.loader.config\_loader 
 ------------------------------

 .. automodule:: fastNLP.loader.config_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.loader.dataset\_loader module
 -------------------------------------
 fastNLP.loader.dataset\_loader 
 -------------------------------

 .. automodule:: fastNLP.loader.dataset_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.loader.embed\_loader module
 -----------------------------------
 fastNLP.loader.embed\_loader 
 -----------------------------

 .. automodule:: fastNLP.loader.embed_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.loader.model\_loader module
 -----------------------------------
 fastNLP.loader.model\_loader 
 -----------------------------

 .. automodule:: fastNLP.loader.model_loader
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.loader.preprocess module
 --------------------------------

 .. automodule:: fastNLP.loader.preprocess
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.loader
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.models.rst
+++ b/docs/source/fastNLP.models.rst
@@ -1,46 +1,30 @@
 fastNLP.models package
 ======================
 fastNLP.models 
 ===============

 Submodules
 ----------

 fastNLP.models.base\_model module
 ---------------------------------
 fastNLP.models.base\_model 
 ---------------------------

 .. automodule:: fastNLP.models.base_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.char\_language\_model module
 -------------------------------------------
 fastNLP.models.char\_language\_model 
 -------------------------------------

 .. automodule:: fastNLP.models.char_language_model
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.cnn\_text\_classification module
 -----------------------------------------------
 fastNLP.models.cnn\_text\_classification 
 -----------------------------------------

 .. automodule:: fastNLP.models.cnn_text_classification
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.models.sequence\_modeling module
 ----------------------------------------
 fastNLP.models.sequence\_modeling 
 ----------------------------------

 .. automodule:: fastNLP.models.sequence_modeling
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.models
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.modules.aggregation.rst
+++ b/docs/source/fastNLP.modules.aggregation.rst
@@ -1,54 +1,36 @@
 fastNLP.modules.aggregation package
 ===================================
 fastNLP.modules.aggregation 
 ============================

 Submodules
 ----------

 fastNLP.modules.aggregation.attention module
 --------------------------------------------
 fastNLP.modules.aggregation.attention 
 --------------------------------------

 .. automodule:: fastNLP.modules.aggregation.attention
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.aggregation.avg\_pool module
 --------------------------------------------
 fastNLP.modules.aggregation.avg\_pool 
 --------------------------------------

 .. automodule:: fastNLP.modules.aggregation.avg_pool
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.aggregation.kmax\_pool module
 ---------------------------------------------
 fastNLP.modules.aggregation.kmax\_pool 
 ---------------------------------------

 .. automodule:: fastNLP.modules.aggregation.kmax_pool
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.aggregation.max\_pool module
 --------------------------------------------
 fastNLP.modules.aggregation.max\_pool 
 --------------------------------------

 .. automodule:: fastNLP.modules.aggregation.max_pool
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.aggregation.self\_attention module
 --------------------------------------------------
 fastNLP.modules.aggregation.self\_attention 
 --------------------------------------------

 .. automodule:: fastNLP.modules.aggregation.self_attention
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.modules.aggregation
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.modules.decoder.rst
+++ b/docs/source/fastNLP.modules.decoder.rst
@@ -1,22 +1,18 @@
 fastNLP.modules.decoder package
 ===============================
 fastNLP.modules.decoder 
 ========================

 Submodules
 ----------

 fastNLP.modules.decoder.CRF module
 ----------------------------------
 fastNLP.modules.decoder.CRF 
 ----------------------------

 .. automodule:: fastNLP.modules.decoder.CRF
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.decoder.MLP 
 ----------------------------

 .. automodule:: fastNLP.modules.decoder.MLP
    :members:

 Module contents
 ---------------

 .. automodule:: fastNLP.modules.decoder
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.modules.encoder.rst
+++ b/docs/source/fastNLP.modules.encoder.rst
@@ -1,78 +1,54 @@
 fastNLP.modules.encoder package
 ===============================
 fastNLP.modules.encoder 
 ========================

 Submodules
 ----------

 fastNLP.modules.encoder.char\_embedding module
 ----------------------------------------------
 fastNLP.modules.encoder.char\_embedding 
 ----------------------------------------

 .. automodule:: fastNLP.modules.encoder.char_embedding
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.conv module
 -----------------------------------
 fastNLP.modules.encoder.conv 
 -----------------------------

 .. automodule:: fastNLP.modules.encoder.conv
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.conv\_maxpool module
 --------------------------------------------
 fastNLP.modules.encoder.conv\_maxpool 
 --------------------------------------

 .. automodule:: fastNLP.modules.encoder.conv_maxpool
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.embedding module
 ----------------------------------------
 fastNLP.modules.encoder.embedding 
 ----------------------------------

 .. automodule:: fastNLP.modules.encoder.embedding
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.linear module
 -------------------------------------
 fastNLP.modules.encoder.linear 
 -------------------------------

 .. automodule:: fastNLP.modules.encoder.linear
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.lstm module
 -----------------------------------
 fastNLP.modules.encoder.lstm 
 -----------------------------

 .. automodule:: fastNLP.modules.encoder.lstm
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.masked\_rnn module
 ------------------------------------------
 fastNLP.modules.encoder.masked\_rnn 
 ------------------------------------

 .. automodule:: fastNLP.modules.encoder.masked_rnn
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.encoder.variational\_rnn module
 -----------------------------------------------
 fastNLP.modules.encoder.variational\_rnn 
 -----------------------------------------

 .. automodule:: fastNLP.modules.encoder.variational_rnn
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.modules.encoder
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.modules.interaction.rst
+++ b/docs/source/fastNLP.modules.interaction.rst
@@ -1,10 +1,5 @@
 fastNLP.modules.interaction package
 ===================================

 Module contents
 ---------------
 fastNLP.modules.interaction 
 ============================

 .. automodule:: fastNLP.modules.interaction
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.modules.rst
+++ b/docs/source/fastNLP.modules.rst
@@ -1,8 +1,5 @@
 fastNLP.modules package
 =======================

 Subpackages
 -----------
 fastNLP.modules 
 ================

 .. toctree::

@@ -11,30 +8,18 @@ Subpackages
    fastNLP.modules.encoder
    fastNLP.modules.interaction

 Submodules
 ----------

 fastNLP.modules.other\_modules module
 -------------------------------------
 fastNLP.modules.other\_modules 
 -------------------------------

 .. automodule:: fastNLP.modules.other_modules
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.modules.utils module
 ----------------------------
 fastNLP.modules.utils 
 ----------------------

 .. automodule:: fastNLP.modules.utils
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.modules
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.rst
+++ b/docs/source/fastNLP.rst
@@ -1,8 +1,5 @@
 fastNLP package
 ===============

 Subpackages
 -----------
 fastNLP 
 ========

 .. toctree::

@@ -12,22 +9,12 @@ Subpackages
    fastNLP.modules
    fastNLP.saver

 Submodules
 ----------

 fastNLP.fastnlp module
 ----------------------
 fastNLP.fastnlp 
 ----------------

 .. automodule:: fastNLP.fastnlp
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/fastNLP.saver.rst
+++ b/docs/source/fastNLP.saver.rst
@@ -1,30 +1,18 @@
 fastNLP.saver package
 =====================
 fastNLP.saver 
 ==============

 Submodules
 ----------

 fastNLP.saver.logger module
 ---------------------------
 fastNLP.saver.logger 
 ---------------------

 .. automodule:: fastNLP.saver.logger
    :members:
    :undoc-members:
    :show-inheritance:

 fastNLP.saver.model\_saver module
 ---------------------------------
 fastNLP.saver.model\_saver 
 ---------------------------

 .. automodule:: fastNLP.saver.model_saver
    :members:
    :undoc-members:
    :show-inheritance:


 Module contents
 ---------------

 .. automodule:: fastNLP.saver
    :members:
    :undoc-members:
    :show-inheritance:
--- a/docs/source/figures/procedures_and_sequence_labeling.png
+++ b/docs/source/figures/procedures_and_sequence_labeling.png
--- a/docs/source/figures/text_classification.png
+++ b/docs/source/figures/text_classification.png
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,16 +1,54 @@
 .. fastNLP documentation master file, created by
   sphinx-quickstart on Mon Aug 20 17:06:44 2018.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.
 fastNLP documentation
 =====================
 fastNLP，目前仍在孵化中。

 Welcome to fastNLP's documentation!
 ===================================

 Introduction
 ------------

 fastNLP是一个基于PyTorch的模块化自然语言处理系统，用于快速开发NLP工具。 
 它将基于深度学习的NLP模型划分为不同的模块。 
 这些模块分为4类：encoder（编码），interaction（交互）, aggregration（聚合） and decoder（解码），
 而每个类别包含不同的实现模块。 

 大多数当前的NLP模型可以构建在这些模块上，这极大地简化了开发NLP模型的过程。 
 fastNLP的架构如下左图所示：

 .. image:: figures/procedures_and_sequence_labeling.png

 在constructing model部分，以序列标注（上右图）和文本分类（下图）为例进行说明：

 .. image:: figures/text_classification.png

 * encoder module：将输入编码为一些抽象表示，输入的是单词序列，输出向量序列。
 * interaction module：使表示中的信息相互交互，输入的是向量序列，输出的也是向量序列。
 * aggregation module：聚合和减少信息，输入向量序列，输出一个向量。
 * decoder module：将表示解码为输出，输出一个label（文本分类）或者输出label序列（序列标注）

 其中interaction module和aggregation module在模型中不一定存在，例如上面的序列标注模型。




 User's Guide
 ------------
 .. toctree::
   :maxdepth: 2

   user/installation
   user/quickstart


 API Reference
 -------------

 If you are looking for information on a specific function, class or
 method, this part of the documentation is for you.

 .. toctree::
   :maxdepth: 4
   :caption: Contents:
   :maxdepth: 2
   
   fastNLP
   fastNLP API <fastNLP>



--- a/docs/source/modules.rst
+++ b/docs/source/modules.rst
@@ -1,7 +0,0 @@
 fastNLP
 =======

 .. toctree::
   :maxdepth: 4

   fastNLP
--- a/docs/source/user/installation.rst
+++ b/docs/source/user/installation.rst
@@ -0,0 +1,31 @@
 ============
 Installation
 ============

 .. contents::
   :local:


 Cloning From GitHub
 ~~~~~~~~~~~~~~~~~~~

 If you just want to use fastNLP, use:

 .. code:: shell

   git clone https://github.com/fastnlp/fastNLP
   cd fastNLP
   

 PyTorch Installation
 ~~~~~~~~~~~~~~~~~~~~

 Visit the [PyTorch official website] for installation instructions based
 on your system. In general, you could use:

 .. code:: shell

   # using conda
   conda install pytorch torchvision -c pytorch
   # or using pip
   pip3 install torch torchvision
--- a/docs/source/user/quickstart.rst
+++ b/docs/source/user/quickstart.rst
@@ -0,0 +1,84 @@
 ==========
 Quickstart
 ==========

 Example
 -------

 Basic Usage
 ~~~~~~~~~~~

 A typical fastNLP routine is composed of four phases: loading dataset,
 pre-processing data, constructing model and training model.

 .. code:: python

   from fastNLP.models.base_model import BaseModel
   from fastNLP.modules import encoder
   from fastNLP.modules import aggregation
   from fastNLP.modules import decoder

   from fastNLP.loader.dataset_loader import ClassDatasetLoader
   from fastNLP.loader.preprocess import ClassPreprocess
   from fastNLP.core.trainer import ClassificationTrainer
   from fastNLP.core.inference import ClassificationInfer


   class ClassificationModel(BaseModel):
       """
       Simple text classification model based on CNN.
       """

       def __init__(self, num_classes, vocab_size):
           super(ClassificationModel, self).__init__()

           self.emb = encoder.Embedding(nums=vocab_size, dims=300)
           self.enc = encoder.Conv(
               in_channels=300, out_channels=100, kernel_size=3)
           self.agg = aggregation.MaxPool()
           self.dec = decoder.MLP(100, num_classes=num_classes)

       def forward(self, x):
           x = self.emb(x)  # [N,L] -> [N,L,C]
           x = self.enc(x)  # [N,L,C_in] -> [N,L,C_out]
           x = self.agg(x)  # [N,L,C] -> [N,C]
           x = self.dec(x)  # [N,C] -> [N, N_class]
           return x


   data_dir = 'data'  # directory to save data and model
   train_path = 'test/data_for_tests/text_classify.txt'  # training set file

   # load dataset
   ds_loader = ClassDatasetLoader("train", train_path)
   data = ds_loader.load()

   # pre-process dataset
   pre = ClassPreprocess(data_dir)
   vocab_size, n_classes = pre.process(data, "data_train.pkl")

   # construct model
   model_args = {
       'num_classes': n_classes,
       'vocab_size': vocab_size
   }
   model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

   # train model
   train_args = {
       "epochs": 20,
       "batch_size": 50,
       "pickle_path": data_dir,
       "validate": False,
       "save_best_dev": False,
       "model_saved_path": None,
       "use_cuda": True,
       "learn_rate": 1e-3,
       "momentum": 0.9}
   trainer = ClassificationTrainer(train_args)
   trainer.train(model)

   # predict using model
   seqs = [x[0] for x in data]
   infer = ClassificationInfer(data_dir)
   labels_pred = infer.predict(model, seqs)
--- a/fastNLP/core/action.py
+++ b/fastNLP/core/action.py
@@ -1,7 +1,3 @@
 """
    This file defines Action(s) and sample methods.

 """
 from collections import Counter

 import numpy as np
@@ -9,13 +5,12 @@ import torch


 class Action(object):
    """
        Operations shared by Trainer, Tester, or Inference.
    """Operations shared by Trainer, Tester, or Inference.

        This is designed for reducing replicate codes.
            - make_batch: produce a min-batch of data. @staticmethod
            - pad: padding method used in sequence modeling. @staticmethod
            - mode: change network mode for either train or test. (for PyTorch) @staticmethod
        The base Action shall define operations shared by as much task-specific Actions as possible.
    """

    def __init__(self):
@@ -24,18 +19,20 @@ class Action(object):
    @staticmethod
    def make_batch(iterator, use_cuda, output_length=True, max_len=None):
        """Batch and Pad data.

        :param iterator: an iterator, (object that implements __next__ method) which returns the next sample.
        :param use_cuda: bool, whether to use GPU
        :param output_length: bool, whether to output the original length of the sequence before padding. (default: True)
        :param max_len: int, maximum sequence length. Longer sequences will be clipped. (default: None)
        :return
        if output_length is True:
        :return :

        if output_length is True,
            (batch_x, seq_len): tuple of two elements
                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
            batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]

        if output_length is False:
        if output_length is False,
            batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
            batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
        """
@@ -77,21 +74,21 @@ class Action(object):
        return batch

    @staticmethod
    def mode(model, test=False):
        """
        Train mode or Test mode. This is for PyTorch currently.
        :param model:
        :param test:
    def mode(model, is_test=False):
        """Train mode or Test mode. This is for PyTorch currently.

        :param model: a PyTorch model
        :param is_test: bool, whether in test mode or not.
        """
        if test:
        if is_test:
            model.eval()
        else:
            model.train()


 def convert_to_torch_tensor(data_list, use_cuda):
    """
    convert lists into (cuda) Tensors.
    """Convert lists into (cuda) Tensors.

    :param data_list: 2-level lists
    :param use_cuda: bool, whether to use GPU or not
    :return data_list: PyTorch Tensor of shape [batch_size, max_seq_len]
@@ -103,8 +100,8 @@ def convert_to_torch_tensor(data_list, use_cuda):


 def k_means_1d(x, k, max_iter=100):
    """
    Perform k-means on 1-D data.
    """Perform k-means on 1-D data.

    :param x: list of int, representing points in 1-D.
    :param k: the number of clusters required.
    :param max_iter: maximum iteration
@@ -132,21 +129,28 @@ def k_means_1d(x, k, max_iter=100):


 def k_means_bucketing(all_inst, buckets):
    """
    """Assign all instances into possible buckets using k-means, such that instances in the same bucket have similar lengths.

    :param all_inst: 3-level list
            E.g. ::

                [
                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                    ...
                ]

    :param buckets: list of int. The length of the list is the number of buckets. Each integer is the maximum length
        threshold for each bucket (This is usually None.).
    :return data: 2-level list
            ::

                [
                    [index_11, index_12, ...],  # bucket 1
                    [index_21, index_22, ...],  # bucket 2
                    ...
                ]

    """
    bucket_data = [[] for _ in buckets]
    num_buckets = len(buckets)
@@ -160,11 +164,16 @@ def k_means_bucketing(all_inst, buckets):


 class BaseSampler(object):
    """
        Base class for all samplers.
    """The base class of all samplers.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list, of shape [num_example, *]

        """
        self.data_set_length = len(data_set)
        self.data = data_set

@@ -176,11 +185,16 @@ class BaseSampler(object):


 class SequentialSampler(BaseSampler):
    """
    Sample data in the original order.
    """Sample data in the original order.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list

        """
        super(SequentialSampler, self).__init__(data_set)

    def __iter__(self):
@@ -188,11 +202,16 @@ class SequentialSampler(BaseSampler):


 class RandomSampler(BaseSampler):
    """
    Sample data in random permutation order.
    """Sample data in random permutation order.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list

        """
        super(RandomSampler, self).__init__(data_set)
        self.order = np.random.permutation(self.data_set_length)

@@ -201,11 +220,18 @@ class RandomSampler(BaseSampler):


 class Batchifier(object):
    """
    Wrap random or sequential sampler to generate a mini-batch.
    """Wrap random or sequential sampler to generate a mini-batch.

    """

    def __init__(self, sampler, batch_size, drop_last=True):
        """

        :param sampler: a Sampler object
        :param batch_size: int, the size of the mini-batch
        :param drop_last: bool, whether to drop the last examples that are not enough to make a mini-batch.

        """
        super(Batchifier, self).__init__()
        self.sampler = sampler
        self.batch_size = batch_size
@@ -223,8 +249,7 @@ class Batchifier(object):


 class BucketBatchifier(Batchifier):
    """
    Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
    """Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
    In sampling, first random choose a bucket. Then sample data from it.
    The number of buckets is decided dynamically by the variance of sentence lengths.
    """
@@ -237,6 +262,7 @@ class BucketBatchifier(Batchifier):
        :param num_buckets: int, number of buckets for grouping these sequences.
        :param drop_last: bool, useless currently.
        :param sampler: Sampler, useless currently.

        """
        super(BucketBatchifier, self).__init__(sampler, batch_size, drop_last)
        buckets = ([None] * num_buckets)
--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@@ -8,8 +8,13 @@ class Loss(object):
    """

    def __init__(self, args):
        """

        :param args: None or str, the name of a loss function.

        """
        if args is None:
            # this is useful when
            # this is useful when Trainer.__init__ performs type check
            self._loss = None
        elif isinstance(args, str):
            self._loss = self._borrow_from_pytorch(args)
@@ -17,10 +22,19 @@ class Loss(object):
            raise NotImplementedError

    def get(self):
        """

        :return self._loss: the loss function
        """
        return self._loss

    @staticmethod
    def _borrow_from_pytorch(loss_name):
        """Given a name of a loss function, return it from PyTorch.

        :param loss_name: str, the name of a loss function
        :return loss: a PyTorch loss
        """
        if loss_name == "cross_entropy":
            return torch.nn.CrossEntropyLoss()
        else:
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -1,11 +1,12 @@
 import warnings

 import numpy as np
 import torch


 def _conver_numpy(x):
    """
    convert input data to numpy array
    """convert input data to numpy array

    """
    if isinstance(x, np.ndarray):
        return x
@@ -17,21 +18,20 @@ def _conver_numpy(x):


 def _check_same_len(*arrays, axis=0):
    """
    check if input array list has same length for one dimension
    """check if input array list has same length for one dimension

    """
    lens = set([x.shape[axis] for x in arrays if x is not None])
    return len(lens) == 1


 def _label_types(y):
    """
    determine the type
    "binary"
    "multiclass"
    "multiclass-multioutput"
    "multilabel"
    "unknown"
    """Determine the type
        - "binary"
        - "multiclass"
        - "multiclass-multioutput"
        - "multilabel"
        - "unknown"
    """
    # never squeeze the first dimension
    y = y.squeeze() if y.shape[0] > 1 else y.resize(1, -1)
@@ -46,8 +46,8 @@ def _label_types(y):


 def _check_data(y_true, y_pred):
    """
    check if y_true and y_pred is same type of data e.g both binary or multiclass
    """Check if y_true and y_pred is same type of data e.g both binary or multiclass

    """
    y_true, y_pred = _conver_numpy(y_true), _conver_numpy(y_pred)
    if not _check_same_len(y_true, y_pred):
@@ -174,16 +174,13 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, digits


 def accuracy_topk(y_true, y_prob, k=1):
    """
    Compute accuracy of y_true matching top-k probable
    """Compute accuracy of y_true matching top-k probable
    labels in y_prob.

    Paras:
        y_ture - ndarray, true label, [n_samples]
        y_prob - ndarray, label probabilities, [n_samples, n_classes]
        k - int, k in top-k
    Returns:
        accuracy of top-k
        :param y_true: ndarray, true label, [n_samples]
        :param y_prob: ndarray, label probabilities, [n_samples, n_classes]
        :param k: int, k in top-k
        :return :accuracy of top-k
    """

    y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
@@ -195,16 +192,14 @@ def accuracy_topk(y_true, y_prob, k=1):


 def pred_topk(y_prob, k=1):
    """
    Return top-k predicted labels and corresponding probabilities.

    Args:
        y_prob - ndarray, size [n_samples, n_classes], probabilities on labels
        k - int, k of top-k
    Returns:
        y_pred_topk - ndarray, size [n_samples, k], predicted top-k labels
        y_prob_topk - ndarray, size [n_samples, k], probabilities for
            top-k labels
    """Return top-k predicted labels and corresponding probabilities.


        :param y_prob: ndarray, size [n_samples, n_classes], probabilities on labels
        :param k: int, k of top-k
    :returns
        y_pred_topk: ndarray, size [n_samples, k], predicted top-k labels
        y_prob_topk: ndarray, size [n_samples, k], probabilities for top-k labels
    """

    y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -4,7 +4,6 @@ import torch
 class Optimizer(object):
    """Wrapper of optimizer from framework

            names: arguments (type)
            1. Adam: lr (float), weight_decay (float)
            2. AdaGrad
            3. RMSProp
@@ -16,20 +15,29 @@ class Optimizer(object):
        """
        :param optimizer_name: str, the name of the optimizer
        :param kwargs: the arguments

        """
        self.optim_name = optimizer_name
        self.kwargs = kwargs

    @property
    def name(self):
        """The name of the optimizer.

        :return: str
        """
        return self.optim_name

    @property
    def params(self):
        """The arguments used to create the optimizer.

        :return: dict of (str, *)
        """
        return self.kwargs

    def construct_from_pytorch(self, model_params):
        """construct a optimizer from framework over given model parameters"""
        """Construct a optimizer from framework over given model parameters."""

        if self.optim_name in ["SGD", "sgd"]:
            if "lr" in self.kwargs:
--- a/fastNLP/core/predictor.py
+++ b/fastNLP/core/predictor.py
@@ -70,7 +70,7 @@ class Predictor(object):
    def predict(self, network, data):
        """Perform inference using the trained model.

        :param network: a PyTorch model
        :param network: a PyTorch model (cpu)
        :param data: list of list of strings
        :return: list of list of strings, [num_examples, tag_seq_length]
        """
--- a/fastNLP/core/preprocess.py
+++ b/fastNLP/core/preprocess.py
@@ -17,20 +17,33 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1,
 # the first vocab in dict with the index = 5

 def save_pickle(obj, pickle_path, file_name):
    """Save an object into a pickle file.

    :param obj: an object
    :param pickle_path: str, the directory where the pickle file is to be saved
    :param file_name: str, the name of the pickle file. In general, it should be ended by "pkl".
    """
    with open(os.path.join(pickle_path, file_name), "wb") as f:
        _pickle.dump(obj, f)
    print("{} saved. ".format(file_name))
    print("{} saved in {}".format(file_name, pickle_path))


 def load_pickle(pickle_path, file_name):
    """Load an object from a given pickle file.

    :param pickle_path: str, the directory where the pickle file is.
    :param file_name: str, the name of the pickle file.
    :return obj: an object stored in the pickle
    """
    with open(os.path.join(pickle_path, file_name), "rb") as f:
        obj = _pickle.load(f)
    print("{} loaded. ".format(file_name))
    print("{} loaded from {}".format(file_name, pickle_path))
    return obj


 def pickle_exist(pickle_path, pickle_name):
    """
    """Check if a given pickle file exists in the directory.

    :param pickle_path: the directory of target pickle file
    :param pickle_name: the filename of target pickle file
    :return: True if file exists else False
@@ -45,6 +58,19 @@ def pickle_exist(pickle_path, pickle_name):


 class BasePreprocess(object):
    """Base class of all preprocessors.
    Preprocessors are responsible for converting data of strings into data of indices.
    During the pre-processing, the following pickle files will be built:

        - "word2id.pkl", a mapping from words(tokens) to indices
        - "id2word.pkl", a reversed dictionary
        - "label2id.pkl", a dictionary on labels
        - "id2label.pkl", a reversed dictionary on labels

    These four pickle files are expected to be saved in the given pickle directory once they are constructed.
    Preprocessors will check if those files are already in the directory and will reuse them in future calls.
    """

    def __init__(self):
        self.word2index = None
        self.label2index = None
@@ -68,6 +94,7 @@ class BasePreprocess(object):
        :param n_fold: int, the number of folds of cross validation. Only useful when cross_val is True.
        :return results: a tuple of datasets after preprocessing.
        """

        if pickle_exist(pickle_path, "word2id.pkl") and pickle_exist(pickle_path, "class2id.pkl"):
            self.word2index = load_pickle(pickle_path, "word2id.pkl")
            self.label2index = load_pickle(pickle_path, "class2id.pkl")
@@ -98,6 +125,8 @@ class BasePreprocess(object):
                save_pickle(data_train, pickle_path, "data_train.pkl")
            else:
                data_train = load_pickle(pickle_path, "data_train.pkl")
                if pickle_exist(pickle_path, "data_dev.pkl"):
                    data_dev = load_pickle(pickle_path, "data_dev.pkl")
        else:
            # cross_val is True
            if not pickle_exist(pickle_path, "data_train_0.pkl"):
@@ -181,25 +210,31 @@ class SeqLabelPreprocess(BasePreprocess):
    """Preprocess pipeline, including building mapping from words to index, from index to words,
        from labels/classes to index, from index to labels/classes.
        data of three-level list which have multiple labels in each sample.
        ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

    """

    def __init__(self):
        super(SeqLabelPreprocess, self).__init__()

    def build_dict(self, data):
        """
        Add new words with indices into self.word_dict, new labels with indices into self.label_dict.
        """Add new words with indices into self.word_dict, new labels with indices into self.label_dict.

        :param data: three-level list
            ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

        :return word2index: dict of {str, int}
                label2index: dict of {str, int}
        """
@@ -215,14 +250,17 @@ class SeqLabelPreprocess(BasePreprocess):
        return word2index, label2index

    def to_index(self, data):
        """
        Convert word strings and label strings into indices.
        """Convert word strings and label strings into indices.

        :param data: three-level list
            ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

        :return data_index: the same shape as data, but each string is replaced by its corresponding index
        """
        data_index = []
@@ -241,11 +279,14 @@ class ClassPreprocess(BasePreprocess):
        Preprocess pipeline, including building mapping from words to index, from index to words,
        from labels/classes to index, from index to labels/classes.
        design for data of three-level list which has a single label in each sample.
            ::

            [
                [ [word_11, word_12, ...], label_1 ],
                [ [word_21, word_22, ...], label_2 ],
                ...
            ]

    """

    def __init__(self):
@@ -268,18 +309,21 @@ class ClassPreprocess(BasePreprocess):

            for word in sent:
                if word not in word2index:
                    word2index[word[0]] = len(word2index)
                    word2index[word] = len(word2index)
        return word2index, label2index

    def to_index(self, data):
        """
        Convert word strings and label strings into indices.
        """Convert word strings and label strings into indices.

        :param data: three-level list
        ::

            [
                [ [word_11, word_12, ...], label_1 ],
                [ [word_21, word_22, ...], label_2 ],
                ...
            ]

        :return data_index: the same shape as data, but each string is replaced by its corresponding index
        """
        data_index = []
@@ -294,14 +338,15 @@ class ClassPreprocess(BasePreprocess):


 def infer_preprocess(pickle_path, data):
    """
        Preprocess over inference data.
        Transform three-level list of strings into that of index.
    """Preprocess over inference data. Transform three-level list of strings into that of index.
        ::

        [
            [word_11, word_12, ...],
            [word_21, word_22, ...],
            ...
        ]

    """
    word2index = load_pickle(pickle_path, "word2id.pkl")
    data_index = []
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -38,7 +38,7 @@ class BaseTester(object):
            Obviously, "required_args" is the subset of "default_args". 
            The value in "default_args" to the keys in "required_args" is simply for type check. 
        """
        # TODO: required arguments
        # add required arguments here
        required_args = {}

        for req_key in required_args:
@@ -56,7 +56,7 @@ class BaseTester(object):
                    logger.error(msg)
                    raise ValueError(msg)
            else:
                # BeseTester doesn't care about extra arguments
                # BaseTester doesn't care about extra arguments
                pass
        print(default_args)

@@ -69,8 +69,8 @@ class BaseTester(object):
        self.print_every_step = default_args["print_every_step"]

        self._model = None
        self.eval_history = []
        self.batch_output = []
        self.eval_history = []  # evaluation results of all batches
        self.batch_output = []  # outputs of all batches

    def test(self, network, dev_data):
        if torch.cuda.is_available() and self.use_cuda:
@@ -83,10 +83,10 @@ class BaseTester(object):
        self.eval_history.clear()
        self.batch_output.clear()

        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
        iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=False))
        step = 0

        for batch_x, batch_y in self.make_batch(iterator, dev_data):
        for batch_x, batch_y in self.make_batch(iterator):
            with torch.no_grad():
                prediction = self.data_forward(network, batch_x)
                eval_results = self.evaluate(prediction, batch_y)
@@ -98,8 +98,8 @@ class BaseTester(object):

            print_output = "[test step {}] {}".format(step, eval_results)
            logger.info(print_output)
            if step % self.print_every_step == 0:
                print(print_output)
            if self.print_every_step > 0 and step % self.print_every_step == 0:
                print(self.make_eval_output(prediction, eval_results))
            step += 1

    def mode(self, model, test):
@@ -115,28 +115,48 @@ class BaseTester(object):
        raise NotImplementedError

    def evaluate(self, predict, truth):
        """Compute evaluation metrics for the model. """
        """Compute evaluation metrics.

        :param predict: Tensor
        :param truth: Tensor
        :return eval_results: can be anything. It will be stored in self.eval_history
        """
        raise NotImplementedError

    @property
    def metrics(self):
        """Return a list of metrics. """
        """Compute and return metrics.
        Use self.eval_history to compute metrics over the whole dev set.
        Please refer to metrics.py for common metric functions.

        :return : variable number of outputs
        """
        raise NotImplementedError

    def show_matrices(self):
        """This is called by Trainer to print evaluation results on dev set during training.
    def show_metrics(self):
        """Customize evaluation outputs in Trainer.
        Called by Trainer to print evaluation results on dev set during training.
        Use self.metrics to fetch available metrics.

        :return print_str: str
        """
        raise NotImplementedError

    def make_batch(self, iterator, data):
    def make_batch(self, iterator):
        raise NotImplementedError

    def make_eval_output(self, predictions, eval_results):
        """Customize Tester outputs.

        :param predictions: Tensor
        :param eval_results: Tensor
        :return: str, to be printed.
        """
        raise NotImplementedError

 class SeqLabelTester(BaseTester):
    """
    Tester for sequence labeling.
    """Tester for sequence labeling.

    """

    def __init__(self, **test_args):
@@ -187,22 +207,22 @@ class SeqLabelTester(BaseTester):
        # make sure "results" is in the same device as "truth"
        results = results.to(truth)
        accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0]
        return [loss.data, accuracy.data]
        return [float(loss), float(accuracy)]

    def metrics(self):
        batch_loss = np.mean([x[0] for x in self.eval_history])
        batch_accuracy = np.mean([x[1] for x in self.eval_history])
        return batch_loss, batch_accuracy

    def show_matrices(self):
        """
        This is called by Trainer to print evaluation on dev set.
    def show_metrics(self):
        """This is called by Trainer to print evaluation on dev set.

        :return print_str: str
        """
        loss, accuracy = self.metrics()
        return "dev loss={:.2f}, accuracy={:.2f}".format(loss, accuracy)

    def make_batch(self, iterator, data):
    def make_batch(self, iterator):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True)


@@ -211,12 +231,12 @@ class ClassificationTester(BaseTester):

    def __init__(self, **test_args):
        """
        :param test_args: a dict-like object that has __getitem__ method, \
        :param test_args: a dict-like object that has __getitem__ method.
            can be accessed by "test_args["key_str"]"
        """
        super(ClassificationTester, self).__init__(**test_args)

    def make_batch(self, iterator, data, max_len=None):
    def make_batch(self, iterator, max_len=None):
        return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len)

    def data_forward(self, network, x):
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -1,11 +1,11 @@
 import _pickle
 import copy
 import os
 import time
 from datetime import timedelta

 import numpy as np
 import torch
 import tensorboardX
 from tensorboardX import SummaryWriter

 from fastNLP.core.action import Action
 from fastNLP.core.action import RandomSampler, Batchifier
@@ -16,16 +16,12 @@ from fastNLP.modules import utils
 from fastNLP.saver.logger import create_logger
 from fastNLP.saver.model_saver import ModelSaver

 DEFAULT_QUEUE_SIZE = 300
 logger = create_logger(__name__, "./train_test.log")


 class BaseTrainer(object):
    """Operations to train a model, including data loading, SGD, and validation.
    """Operations of training a model, including data loading, gradient descent, and validation.

        Subclasses must implement the following abstract methods:
        - grad_backward
        - get_loss
    """

    def __init__(self, **kwargs):
@@ -33,10 +29,10 @@ class BaseTrainer(object):
        :param kwargs: dict of (key, value), or dict-like object. key is str.

        The base trainer requires the following keys:
        - epochs: int, the number of epochs in training
        - validate: bool, whether or not to validate on dev set
        - batch_size: int
        - pickle_path: str, the path to pickle files for pre-processing
            - epochs: int, the number of epochs in training
            - validate: bool, whether or not to validate on dev set
            - batch_size: int
            - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()

@@ -47,8 +43,8 @@ class BaseTrainer(object):
            Otherwise, error will raise.
        """
        default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/",
                        "save_best_dev": True, "model_name": "default_model_name.pkl",
                        "loss": Loss(None),
                        "save_best_dev": True, "model_name": "default_model_name.pkl", "print_every_step": 1,
                        "loss": Loss(None),  # used to pass type check
                        "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0)
                        }
        """
@@ -57,7 +53,7 @@ class BaseTrainer(object):
            Obviously, "required_args" is the subset of "default_args". 
            The value in "default_args" to the keys in "required_args" is simply for type check. 
        """
        # TODO: required arguments
        # add required arguments here
        required_args = {}

        for req_key in required_args:
@@ -86,55 +82,46 @@ class BaseTrainer(object):
        self.save_best_dev = default_args["save_best_dev"]
        self.use_cuda = default_args["use_cuda"]
        self.model_name = default_args["model_name"]
        self.print_every_step = default_args["print_every_step"]

        self._model = None
        self._loss_func = default_args["loss"].get()  # return a pytorch loss function or None
        self._optimizer = None
        self._optimizer_proto = default_args["optimizer"]
        self._summary_writer = SummaryWriter(self.pickle_path + 'tensorboard_logs')
        self._graph_summaried = False

    def train(self, network, train_data, dev_data=None):
        """General Training Steps
        """General Training Procedure

        :param network: a model
        :param train_data: three-level list, the training set.
        :param dev_data: three-level list, the validation data (optional)

        The method is framework independent.
        Work by calling the following methods:
            - prepare_input
            - mode
            - define_optimizer
            - data_forward
            - get_loss
            - grad_backward
            - update
        Subclasses must implement these methods with a specific framework.
        """
        # prepare model and data, transfer model to gpu if available
        # transfer model to gpu if available
        if torch.cuda.is_available() and self.use_cuda:
            self._model = network.cuda()
            # self._model is used to access model-specific loss
        else:
            self._model = network

        # define tester over dev data
        # define Tester over dev data
        if self.validate:
            default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                                  "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
                                  "use_cuda": self.use_cuda}
                                  "use_cuda": self.use_cuda, "print_every_step": 0}
            validator = self._create_validator(default_valid_args)
            logger.info("validator defined as {}".format(str(validator)))

        # optimizer and loss
        self.define_optimizer()
        logger.info("optimizer defined as {}".format(str(self._optimizer)))
        self.define_loss()
        logger.info("loss function defined as {}".format(str(self._loss_func)))

        # main training epochs
        n_samples = len(train_data)
        n_batches = n_samples // self.batch_size
        n_print = 1
        # main training procedure
        start = time.time()
        logger.info("training epochs started")

        for epoch in range(1, self.n_epochs + 1):
            logger.info("training epoch {}".format(epoch))

@@ -144,23 +131,31 @@ class BaseTrainer(object):
            data_iterator = iter(Batchifier(RandomSampler(train_data), self.batch_size, drop_last=False))
            logger.info("prepared data iterator")

            self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch)
            # one forward and backward pass
            self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch)

            # validation
            if self.validate:
                logger.info("validation started")
                validator.test(network, dev_data)

                if self.save_best_dev and self.best_eval_result(validator):
                    self.save_model(network, self.model_name)
                    print("saved better model selected by dev")
                    logger.info("saved better model selected by dev")
                    print("Saved better model selected by validation.")
                    logger.info("Saved better model selected by validation.")

                valid_results = validator.show_matrices()
                valid_results = validator.show_metrics()
                print("[epoch {}] {}".format(epoch, valid_results))
                logger.info("[epoch {}] {}".format(epoch, valid_results))

    def _train_step(self, data_iterator, network, **kwargs):
        """Training process in one epoch."""
        """Training process in one epoch.

            kwargs should contain:
                - n_print: int, print training information every n steps.
                - start: time.time(), the starting time of this step.
                - epoch: int,
        """
        step = 0
        for batch_x, batch_y in self.make_batch(data_iterator):

@@ -169,8 +164,13 @@ class BaseTrainer(object):
            loss = self.get_loss(prediction, batch_y)
            self.grad_backward(loss)
            self.update()
            self._summary_writer.add_scalar("loss", loss.item(), global_step=step)

            if step % kwargs["n_print"] == 0:
            if not self._graph_summaried:
                self._summary_writer.add_graph(network, batch_x)
                self._graph_summaried = True

            if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
                end = time.time()
                diff = timedelta(seconds=round(end - kwargs["start"]))
                print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
@@ -204,21 +204,6 @@ class BaseTrainer(object):
            network_copy = copy.deepcopy(network)
            self.train(network_copy, train_data_cv[i], dev_data_cv[i])

    def load_train_data(self, pickle_path):
        """
        For task-specific processing.
        :param pickle_path:
        :return data_train
        """
        file_path = os.path.join(pickle_path, "data_train.pkl")
        if os.path.exists(file_path):
            with open(file_path, 'rb') as f:
                data = _pickle.load(f)
        else:
            logger.error("cannot find training data {}. invalid input path for training data.".format(file_path))
            raise RuntimeError("cannot find training data {}".format(file_path))
        return data

    def make_batch(self, iterator):
        raise NotImplementedError

@@ -226,14 +211,13 @@ class BaseTrainer(object):
        Action.mode(network, test)

    def define_optimizer(self):
        """
        Define framework-specific optimizer specified by the models.
        """Define framework-specific optimizer specified by the models.

        """
        self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters())

    def update(self):
        """
        Perform weight update on a model.
        """Perform weight update on a model.

        For PyTorch, just call optimizer to update.
        """
@@ -243,8 +227,8 @@ class BaseTrainer(object):
        raise NotImplementedError

    def grad_backward(self, loss):
        """
        Compute gradient with link rules.
        """Compute gradient with link rules.

        :param loss: a scalar where back-prop starts

        For PyTorch, just do "loss.backward()"
@@ -253,8 +237,8 @@ class BaseTrainer(object):
        loss.backward()

    def get_loss(self, predict, truth):
        """
        Compute loss given prediction and ground truth.
        """Compute loss given prediction and ground truth.

        :param predict: prediction label vector
        :param truth: ground truth label vector
        :return: a scalar
@@ -262,8 +246,9 @@ class BaseTrainer(object):
        return self._loss_func(predict, truth)

    def define_loss(self):
        """
        if the model defines a loss, use model's loss.
        """Define a loss for the trainer.

        If the model defines a loss, use model's loss.
        Otherwise, Trainer must has a loss argument, use it as loss.
        These two losses cannot be defined at the same time.
        Trainer does not handle loss definition or choose default losses.
@@ -280,53 +265,30 @@ class BaseTrainer(object):
            logger.info("The model didn't define loss, use Trainer's loss.")

    def best_eval_result(self, validator):
        """
        """Check if the current epoch yields better validation results.

        :param validator: a Tester instance
        :return: bool, True means current results on dev set is the best.
        """
        raise NotImplementedError

    def save_model(self, network, model_name):
        """
        """Save this model with such a name.
        This method may be called multiple times by Trainer to overwritten a better model.

        :param network: the PyTorch model
        :param model_name: str
        model_best_dev.pkl may be overwritten by a better model in future epochs.
        """
        if model_name[-4:] != ".pkl":
            model_name += ".pkl"
        ModelSaver(self.pickle_path + model_name).save_pytorch(network)
        ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network)

    def _create_validator(self, valid_args):
        raise NotImplementedError


 class ToyTrainer(BaseTrainer):
    """
        An example to show the definition of Trainer.
    """

    def __init__(self, training_args):
        super(ToyTrainer, self).__init__(training_args)

    def load_train_data(self, data_path):
        data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb"))
        return data_train, data_dev, 0, 1

    def data_forward(self, network, x):
        return network(x)

    def grad_backward(self, loss):
        self._model.zero_grad()
        loss.backward()

    def get_loss(self, pred, truth):
        return np.mean(np.square(pred - truth))


 class SeqLabelTrainer(BaseTrainer):
    """
    Trainer for Sequence Modeling
    """Trainer for Sequence Labeling

    """

@@ -356,11 +318,11 @@ class SeqLabelTrainer(BaseTrainer):
        return y

    def get_loss(self, predict, truth):
        """
        Compute loss given prediction and ground truth.
        """Compute loss given prediction and ground truth.

        :param predict: prediction label vector, [batch_size, max_len, tag_size]
        :param truth: ground truth label vector, [batch_size, max_len]
        :return: a scalar
        :return loss: a scalar
        """
        batch_size, max_len = predict.size(0), predict.size(1)
        assert truth.shape == (batch_size, max_len)
@@ -384,7 +346,7 @@ class SeqLabelTrainer(BaseTrainer):


 class ClassificationTrainer(BaseTrainer):
    """Trainer for classification."""
    """Trainer for text classification."""

    def __init__(self, **train_args):
        super(ClassificationTrainer, self).__init__(**train_args)
--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@@ -1,4 +1,7 @@
 import os

 from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer
 from fastNLP.core.preprocess import load_pickle
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.loader.model_loader import ModelLoader

@@ -7,14 +10,13 @@ mapping from model name to [URL, file_name.class_name, model_pickle_name]
 Notice that the class of the model should be in "models" directory.

 Example:
    "zh_pos_tag_model": ["www.fudan.edu.cn", "sequence_modeling.SeqLabeling", "saved_model.pkl"]
 """
 FastNLP_MODEL_COLLECTION = {
    "seq_label_model": {
        "url": "www.fudan.edu.cn",
        "class": "sequence_modeling.SeqLabeling",
        "class": "sequence_modeling.SeqLabeling", # file_name.class_name in models/
        "pickle": "seq_label_model.pkl",
        "type": "seq_label"
        "type": "seq_label",
        "config_file_name": "config",   # the name of the config file which stores model initialization parameters
        "config_section_name": "text_class_model" # the name of the section in the config file which stores model init params
    },
    "text_class_model": {
        "url": "www.fudan.edu.cn",
@@ -22,11 +24,34 @@ FastNLP_MODEL_COLLECTION = {
        "pickle": "text_class_model.pkl",
        "type": "text_class"
    }
 """
 FastNLP_MODEL_COLLECTION = {
    "cws_basic_model": {
        "url": "",
        "class": "sequence_modeling.AdvSeqLabel",
        "pickle": "cws_basic_model_v_0.pkl",
        "type": "seq_label",
        "config_file_name": "config",
        "config_section_name": "text_class_model"
    },
    "pos_tag_model": {
        "url": "",
        "class": "sequence_modeling.AdvSeqLabel",
        "pickle": "pos_tag_model_v_0.pkl",
        "type": "seq_label",
        "config_file_name": "pos_tag.config",
        "config_section_name": "pos_tag_model"
    },
    "text_classify_model": {
        "url": "",
        "class": "cnn_text_classification.CNNText",
        "pickle": "text_class_model_v0.pkl",
        "type": "text_class",
        "config_file_name": "text_classify.cfg",
        "config_section_name": "model"
    }
 }

 CONFIG_FILE_NAME = "config"
 SECTION_NAME = "text_class_model"


 class FastNLP(object):
    """
@@ -51,10 +76,13 @@ class FastNLP(object):
        self.model = None
        self.infer_type = None  # "seq_label"/"text_class"

    def load(self, model_name):
    def load(self, model_name, config_file="config", section_name="model"):
        """
        Load a pre-trained FastNLP model together with additional data.
        :param model_name: str, the name of a FastNLP model.
        :param config_file: str, the name of the config file which stores the initialization information of the model.
                (default: "config")
        :param section_name: str, the name of the corresponding section in the config file. (default: model)
        """
        assert type(model_name) is str
        if model_name not in FastNLP_MODEL_COLLECTION:
@@ -64,37 +92,47 @@ class FastNLP(object):
            self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"])

        model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"])
        print("Restore model class {}".format(str(model_class)))

        model_args = ConfigSection()
        ConfigLoader.load_config(self.model_dir + CONFIG_FILE_NAME, {SECTION_NAME: model_args})
        ConfigLoader.load_config(os.path.join(self.model_dir, config_file), {section_name: model_args})
        print("Restore model hyper-parameters {}".format(str(model_args.data)))

        # fetch dictionary size and number of labels from pickle files
        word2index = load_pickle(self.model_dir, "word2id.pkl")
        model_args["vocab_size"] = len(word2index)
        index2label = load_pickle(self.model_dir, "id2class.pkl")
        model_args["num_classes"] = len(index2label)

        # Construct the model
        model = model_class(model_args)
        print("Model constructed.")

        # To do: framework independent
        ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"])
        ModelLoader.load_pytorch(model, os.path.join(self.model_dir, FastNLP_MODEL_COLLECTION[model_name]["pickle"]))
        print("Model weights loaded.")

        self.model = model
        self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"]

        print("Model loaded. ")
        print("Inference ready.")

    def run(self, raw_input):
        """
        Perform inference over given input using the loaded model.
        :param raw_input: str, raw text
        :param raw_input: list of string. Each list is an input query.
        :return results:
        """

        infer = self._create_inference(self.model_dir)

        # string ---> 2-D list of string
        infer_input = self.string_to_list(raw_input)
        # tokenize: list of string ---> 2-D list of string
        infer_input = self.tokenize(raw_input, language="zh")

        # 2-D list of string ---> list of strings
        # 2-D list of string ---> 2-D list of tags
        results = infer.predict(self.model, infer_input)

        # list of strings ---> final answers
        # 2-D list of tags ---> list of final answers
        outputs = self._make_output(results, infer_input)
        return outputs

@@ -142,81 +180,135 @@ class FastNLP(object):
        """
        return True

    def string_to_list(self, text, delimiter="\n"):
        """
        This function is used to transform raw input to lists, which is done by DatasetLoader in training.
        Split text string into three-level lists.
        [
            [word_11, word_12, ...],
            [word_21, word_22, ...],
            ...
        ]
        :param text: string
        :param delimiter: str, character used to split text into sentences.
        :return data: two-level lists
    def tokenize(self, text, language):
        """Extract tokens from strings.
        For English, extract words separated by space.
        For Chinese, extract characters.
        TODO: more complex tokenization methods

        :param text: list of string
        :param language: str, one of ('zh', 'en'), Chinese or English.
        :return data: list of list of string, each string is a token.
        """
        assert language in ("zh", "en")
        data = []
        sents = text.strip().split(delimiter)
        for sent in sents:
            characters = []
            for ch in sent:
                characters.append(ch)
            data.append(characters)
        for sent in text:
            if language == "en":
                tokens = sent.strip().split()
            elif language == "zh":
                tokens = [char for char in sent]
            else:
                raise RuntimeError("Unknown language {}".format(language))
            data.append(tokens)
        return data

    def _make_output(self, results, infer_input):
        """Transform the infer output into user-friendly output.

        :param results: 1 or 2-D list of strings.
                If self.infer_type == "seq_label", it is of shape [num_examples, tag_seq_length]
                If self.infer_type == "text_class", it is of shape [num_examples]
        :param infer_input: 2-D list of string, the input query before inference.
        :return outputs: list. Each entry is a prediction.
        """
        if self.infer_type == "seq_label":
            outputs = make_seq_label_output(results, infer_input)
        elif self.infer_type == "text_class":
            outputs = make_class_output(results, infer_input)
        else:
            raise ValueError("fail to make outputs with infer type {}".format(self.infer_type))
            raise RuntimeError("fail to make outputs with infer type {}".format(self.infer_type))
        return outputs


 def make_seq_label_output(result, infer_input):
    """
     Transform model output into user-friendly contents.
    :param result: 1-D list of strings. (model output)
    """Transform model output into user-friendly contents.

    :param result: 2-D list of strings. (model output)
    :param infer_input: 2-D list of string (model input)
    :return outputs:
    :return ret: list of list of tuples
        [
            [(word_11, label_11), (word_12, label_12), ...],
            [(word_21, label_21), (word_22, label_22), ...],
            ...
        ]
    """
    return result

    ret = []
    for example_x, example_y in zip(infer_input, result):
        ret.append([(x, y) for x, y in zip(example_x, example_y)])
    return ret

 def make_class_output(result, infer_input):
    """Transform model output into user-friendly contents.

    :param result: 2-D list of strings. (model output)
    :param infer_input: 1-D list of string (model input)
    :return ret: the same as result, [label_1, label_2, ...]
    """
    return result


 def interpret_word_seg_results(infer_input, results):
    """
    Transform model output into user-friendly contents.
 def interpret_word_seg_results(char_seq, label_seq):
    """Transform model output into user-friendly contents.

    Example: In CWS, convert <BMES> labeling into segmented text.
    :param results: list of strings. (model output)
    :param infer_input: 2-D list of string (model input)
    :return output: list of strings
    :param char_seq: list of string,
    :param label_seq: list of string, the same length as char_seq
            Each entry is one of ('B', 'M', 'E', 'S').
    :return output: list of words
    """
    outputs = []
    for sent_char, sent_label in zip(infer_input, results):
        words = []
        word = ""
        for char, label in zip(sent_char, sent_label):
            if label[0] == "B":
                if word != "":
                    words.append(word)
                word = char
            elif label[0] == "M":
                word += char
            elif label[0] == "E":
                word += char
    words = []
    word = ""
    for char, label in zip(char_seq, label_seq):
        if label[0] == "B":
            if word != "":
                words.append(word)
                word = ""
            elif label[0] == "S":
                if word != "":
                    words.append(word)
                word = ""
                words.append(char)
            else:
                raise ValueError("invalid label")
        outputs.append(" ".join(words))
            word = char
        elif label[0] == "M":
            word += char
        elif label[0] == "E":
            word += char
            words.append(word)
            word = ""
        elif label[0] == "S":
            if word != "":
                words.append(word)
            word = ""
            words.append(char)
        else:
            raise ValueError("invalid label {}".format(label[0]))
    return words


 def interpret_cws_pos_results(char_seq, label_seq):
    """Transform model output into user-friendly contents.

    :param char_seq: list of string
    :param label_seq: list of string, the same length as char_seq.
    :return outputs: list of tuple (words, pos_tag):
    """

    def pos_tag_check(seq):
        """check whether all entries are the same """
        return len(set(seq)) <= 1

    word = []
    word_pos = []
    outputs = []
    for char, label in zip(char_seq, label_seq):
        tmp = label.split("-")
        cws_label, pos_tag = tmp[0], tmp[1]

        if cws_label == "B" or cws_label == "M":
            word.append(char)
            word_pos.append(pos_tag)
        elif cws_label == "E":
            word.append(char)
            word_pos.append(pos_tag)
            if not pos_tag_check(word_pos):
                raise RuntimeError("character-wise pos tags inconsistent. ")
            outputs.append(("".join(word), word_pos[0]))
            word.clear()
            word_pos.clear()
        elif cws_label == "S":
            outputs.append((char, pos_tag))
    return outputs
--- a/fastNLP/loader/base_loader.py
+++ b/fastNLP/loader/base_loader.py
@@ -1,9 +1,8 @@
 class BaseLoader(object):
    """docstring for BaseLoader"""

    def __init__(self, data_name, data_path):
    def __init__(self, data_path):
        super(BaseLoader, self).__init__()
        self.data_name = data_name
        self.data_path = data_path

    def load(self):
@@ -25,8 +24,8 @@ class ToyLoader0(BaseLoader):
        For charLM
    """

    def __init__(self, name, path):
        super(ToyLoader0, self).__init__(name, path)
    def __init__(self, data_path):
        super(ToyLoader0, self).__init__(data_path)

    def load(self):
        with open(self.data_path, 'r') as f:
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -6,8 +6,8 @@ from fastNLP.loader.base_loader import BaseLoader
 class DatasetLoader(BaseLoader):
    """"loader for data sets"""

    def __init__(self, data_name, data_path):
        super(DatasetLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(DatasetLoader, self).__init__(data_path)


 class POSDatasetLoader(DatasetLoader):
@@ -31,8 +31,8 @@ class POSDatasetLoader(DatasetLoader):
    to label5.
    """

    def __init__(self, data_name, data_path):
        super(POSDatasetLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(POSDatasetLoader, self).__init__(data_path)

    def load(self):
        assert os.path.exists(self.data_path)
@@ -84,8 +84,8 @@ class TokenizeDatasetLoader(DatasetLoader):
    Data set loader for tokenization data sets
    """

    def __init__(self, data_name, data_path):
        super(TokenizeDatasetLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(TokenizeDatasetLoader, self).__init__(data_path)

    def load_pku(self, max_seq_len=32):
        """
@@ -138,8 +138,8 @@ class TokenizeDatasetLoader(DatasetLoader):
 class ClassDatasetLoader(DatasetLoader):
    """Loader for classification data sets"""

    def __init__(self, data_name, data_path):
        super(ClassDatasetLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(ClassDatasetLoader, self).__init__(data_path)

    def load(self):
        assert os.path.exists(self.data_path)
@@ -177,7 +177,7 @@ class ConllLoader(DatasetLoader):
        :param  str data_name: the name of the conll data set
        :param str data_path: the path to the conll data set
        """
        super(ConllLoader, self).__init__(data_name, data_path)
        super(ConllLoader, self).__init__(data_path)
        self.data_set = self.parse(self.load())

    def load(self):
@@ -209,8 +209,8 @@ class ConllLoader(DatasetLoader):


 class LMDatasetLoader(DatasetLoader):
    def __init__(self, data_name, data_path):
        super(LMDatasetLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(LMDatasetLoader, self).__init__(data_path)

    def load(self):
        if not os.path.exists(self.data_path):
@@ -220,13 +220,57 @@ class LMDatasetLoader(DatasetLoader):
        return text.strip().split()


 if __name__ == "__main__":
 class PeopleDailyCorpusLoader(DatasetLoader):
    """
    data = POSDatasetLoader("xxx", "../../test/data_for_tests/people.txt").load_lines()
    for example in data:
        for w, l in zip(example[0], example[1]):
            print(w, l)
        People Daily Corpus: Chinese word segmentation, POS tag, NER
    """

    ans = TokenizeDatasetLoader("xxx", "/home/zyfeng/Desktop/data/icwb2-data/training/test").load_pku()
    print(ans)
    def __init__(self, data_path):
        super(PeopleDailyCorpusLoader, self).__init__(data_path)

    def load(self):
        with open(self.data_path, "r", encoding="utf-8") as f:
            sents = f.readlines()

        pos_tag_examples = []
        ner_examples = []
        for sent in sents:
            inside_ne = False
            sent_pos_tag = []
            sent_words = []
            sent_ner = []
            words = sent.strip().split()[1:]
            for word in words:
                if "[" in word and "]" in word:
                    ner_tag = "U"
                    print(word)
                elif "[" in word:
                    inside_ne = True
                    ner_tag = "B"
                    word = word[1:]
                elif "]" in word:
                    ner_tag = "L"
                    word = word[:word.index("]")]
                    if inside_ne is True:
                        inside_ne = False
                    else:
                        raise RuntimeError("only ] appears!")
                else:
                    if inside_ne is True:
                        ner_tag = "I"
                    else:
                        ner_tag = "O"
                tmp = word.split("/")
                token, pos = tmp[0], tmp[1]
                sent_ner.append(ner_tag)
                sent_pos_tag.append(pos)
                sent_words.append(token)
            pos_tag_examples.append([sent_words, sent_pos_tag])
            ner_examples.append([sent_words, sent_ner])
        return pos_tag_examples, ner_examples

 if __name__ == "__main__":
    loader = PeopleDailyCorpusLoader("./")
    pos, ner = loader.load()
    print(pos[:10])
    print(ner[:10])
--- a/fastNLP/loader/embed_loader.py
+++ b/fastNLP/loader/embed_loader.py
@@ -1,8 +1,50 @@
 import _pickle
 import os

 import numpy as np

 from fastNLP.loader.base_loader import BaseLoader


 class EmbedLoader(BaseLoader):
    """docstring for EmbedLoader"""

    def __init__(self, data_name, data_path):
        super(EmbedLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(EmbedLoader, self).__init__(data_path)

    @staticmethod
    def load_embedding(emb_dim, emb_file, word_dict, emb_pkl):
        """Load the pre-trained embedding and combine with the given dictionary.

        :param emb_file: str, the pre-trained embedding.
                The embedding file should have the following format:
                    Each line is a word embedding, where a word string is followed by multiple floats.
                    Floats are separated by space. The word and the first float are separated by space.
        :param word_dict: dict, a mapping from word to index.
        :param emb_dim: int, the dimension of the embedding. Should be the same as pre-trained embedding.
        :param emb_pkl: str, the embedding pickle file.
        :return embedding_np: numpy array of shape (len(word_dict), emb_dim)

        TODO: fragile code
        """
        # If the embedding pickle exists, load it and return.
        if os.path.exists(emb_pkl):
            with open(emb_pkl, "rb") as f:
                embedding_np = _pickle.load(f)
            return embedding_np
        # Otherwise, load the pre-trained embedding.
        with open(emb_file, "r", encoding="utf-8") as f:
            # begin with a random embedding
            embedding_np = np.random.uniform(-1, 1, size=(len(word_dict), emb_dim))
            for line in f:
                line = line.strip().split()
                if len(line) != emb_dim + 1:
                    # skip this line if two embedding dimension not match
                    continue
                if line[0] in word_dict:
                    # find the word and replace its embedding with a pre-trained one
                    embedding_np[word_dict[line[0]]] = [float(i) for i in line[1:]]
        # save and return the result
        with open(emb_pkl, "wb") as f:
            _pickle.dump(embedding_np, f)
        return embedding_np
--- a/fastNLP/loader/model_loader.py
+++ b/fastNLP/loader/model_loader.py
@@ -8,8 +8,8 @@ class ModelLoader(BaseLoader):
        Loader for models.
    """

    def __init__(self, data_name, data_path):
        super(ModelLoader, self).__init__(data_name, data_path)
    def __init__(self, data_path):
        super(ModelLoader, self).__init__(data_path)

    @staticmethod
    def load_pytorch(empty_model, model_path):
--- a/fastNLP/models/cnn_text_classification.py
+++ b/fastNLP/models/cnn_text_classification.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn

 # import torch.nn.functional as F
 from fastNLP.modules.encoder.conv_maxpool import ConvMaxpool
 import fastNLP.modules.encoder as encoder


 class CNNText(torch.nn.Module):
@@ -18,22 +18,22 @@ class CNNText(torch.nn.Module):
    def __init__(self, args):
        super(CNNText, self).__init__()

        class_num = args["num_classes"]
        num_classes = args["num_classes"]
        kernel_nums = [100, 100, 100]
        kernel_sizes = [3, 4, 5]
        embed_num = args["vocab_size"]
        vocab_size = args["vocab_size"]
        embed_dim = 300
        pretrained_embed = None
        drop_prob = 0.5

        # no support for pre-trained embedding currently
        self.embed = nn.Embedding(embed_num, embed_dim, padding_idx=0)
        self.conv_pool = ConvMaxpool(
        self.embed = encoder.embedding.Embedding(vocab_size, embed_dim)
        self.conv_pool = encoder.conv_maxpool.ConvMaxpool(
            in_channels=embed_dim,
            out_channels=kernel_nums,
            kernel_sizes=kernel_sizes)
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(sum(kernel_nums), class_num)
        self.fc = encoder.linear.Linear(sum(kernel_nums), num_classes)

    def forward(self, x):
        x = self.embed(x)  # [N,L] -> [N,L,C]
--- a/fastNLP/modules/decoder/init.py
+++ b/fastNLP/modules/decoder/init.py
@@ -1,3 +1,4 @@
 from .CRF import ConditionalRandomField
 from .MLP import MLP

 __all__ = ["ConditionalRandomField"]
 __all__ = ["ConditionalRandomField", "MLP"]
--- a/fastNLP/modules/encoder/init.py
+++ b/fastNLP/modules/encoder/init.py
@@ -2,8 +2,10 @@ from .embedding import Embedding
 from .linear import Linear
 from .lstm import Lstm
 from .conv import Conv
 from .conv_maxpool import ConvMaxpool

 __all__ = ["Lstm",
           "Embedding",
           "Linear",
           "Conv"]
           "Conv",
           "ConvMaxpool"]
--- a/fastNLP/modules/encoder/conv_maxpool.py
+++ b/fastNLP/modules/encoder/conv_maxpool.py
@@ -4,6 +4,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.init import xavier_uniform_


 class ConvMaxpool(nn.Module):
@@ -21,6 +22,7 @@ class ConvMaxpool(nn.Module):
            if isinstance(kernel_sizes, int):
                out_channels = [out_channels]
                kernel_sizes = [kernel_sizes]

            self.convs = nn.ModuleList([nn.Conv1d(
                in_channels=in_channels,
                out_channels=oc,
@@ -31,6 +33,9 @@ class ConvMaxpool(nn.Module):
                groups=groups,
                bias=bias)
                for oc, ks in zip(out_channels, kernel_sizes)])

            for conv in self.convs:
                xavier_uniform_(conv.weight)  # weight initialization
        else:
            raise Exception(
                'Incorrect kernel sizes: should be list, tuple or int')
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@@ -15,7 +15,7 @@ class Embedding(nn.Module):
    def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0):
        super(Embedding, self).__init__()
        self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)
        if init_emb:
        if init_emb is not None:
            self.embed.weight = nn.Parameter(init_emb)
        self.dropout = nn.Dropout(dropout)

--- a/fastnlp-architecture.jpg
+++ b/fastnlp-architecture.jpg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.neg
--- a/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
+++ b/reproduction/CNN-sentence_classification/rt-polaritydata/rt-polarity.pos
--- a/reproduction/HAN-document_classification/data/test_samples.pkl
+++ b/reproduction/HAN-document_classification/data/test_samples.pkl
--- a/reproduction/HAN-document_classification/data/train_samples.pkl
+++ b/reproduction/HAN-document_classification/data/train_samples.pkl
--- a/reproduction/HAN-document_classification/data/yelp.word2vec
+++ b/reproduction/HAN-document_classification/data/yelp.word2vec
--- a/reproduction/chinese_word_seg/cws_train.py
+++ b/reproduction/chinese_word_seg/cws_train.py
@@ -1,114 +0,0 @@
 import sys

 sys.path.append("..")

 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
 from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import SeqLabeling
 from fastNLP.core.predictor import Predictor

 data_name = "pku_training.utf8"
 cws_data_path = "/home/zyfeng/data/pku_training.utf8"
 pickle_path = "./save/"
 data_infer_path = "/home/zyfeng/data/pku_test.utf8"


 def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

    # Define the same model
    model = SeqLabeling(test_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
    print("model loaded!")

    # Data Loader
    raw_data_loader = BaseLoader(data_name, data_infer_path)
    infer_data = raw_data_loader.load_lines()

    # Inference interface
    infer = Predictor(pickle_path)
    results = infer.predict(model, infer_data)

    print(results)
    print("Inference finished!")


 def train_test():
    # Config Loader
    train_args = ConfigSection()
    test_args = ConfigSection()
    ConfigLoader("good_name", "good_path").load_config("./cws.cfg", {"train": train_args, "test": test_args})

    # Data Loader
    loader = TokenizeDatasetLoader(data_name, cws_data_path)
    train_data = loader.load_pku()

    # Preprocessor
    preprocess = SeqLabelPreprocess()
    data_train, data_dev = preprocess.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = preprocess.vocab_size
    train_args["num_classes"] = preprocess.num_classes

    # Trainer
    trainer = SeqLabelTrainer(train_args)

    # Model
    model = SeqLabeling(train_args)

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./save/saved_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")

    # testing with validation set
    test(data_dev)


 def test(test_data):
    # Config Loader
    train_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})

    # Define the same model
    model = SeqLabeling(train_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
    print("model loaded!")

    # Load test configuration
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})

    # Tester
    tester = SeqLabelTester(test_args)

    # Start testing
    tester.test(model, test_data)

    # print test results
    print(tester.show_matrices())
    print("model tested!")


 if __name__ == "__main__":
    train_test()
--- a/reproduction/chinese_word_segment/cws.cfg
+++ b/reproduction/chinese_word_segment/cws.cfg
@@ -31,4 +31,16 @@ pickle_path = "./save/"
 use_crf = true
 use_cuda = true
 rnn_hidden_units = 100
 word_emb_dim = 100

 [model]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
 batch_size = 640
 pickle_path = "./save/"
 use_crf = true
 use_cuda = true
 rnn_hidden_units = 100
 word_emb_dim = 100
--- a/reproduction/chinese_word_segment/run.py
+++ b/reproduction/chinese_word_segment/run.py
@@ -1,33 +1,33 @@
 import sys, os
 import os
 import sys

 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader
 from fastNLP.loader.preprocess import POSPreprocess, load_pickle
 from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import AdvSeqLabel
 from fastNLP.core.inference import SeqLabelInfer
 from fastNLP.core.optimizer import SGD
 from fastNLP.core.predictor import SeqLabelInfer

 # not in the file's dir
 if len(os.path.dirname(__file__)) != 0:
    os.chdir(os.path.dirname(__file__))
 datadir = 'icwb2-data'
 cfgfile = 'cws.cfg'
 datadir = "/home/zyfeng/data/"
 cfgfile = './cws.cfg'
 data_name = "pku_training.utf8"

 cws_data_path = os.path.join(datadir, "training/pku_training.utf8")
 cws_data_path = os.path.join(datadir, "pku_training.utf8")
 pickle_path = "save"
 data_infer_path = os.path.join(datadir, "infer.utf8")

 def infer():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -47,7 +47,7 @@ def infer():
        raise

    # Data Loader
    raw_data_loader = BaseLoader(data_name, data_infer_path)
    raw_data_loader = BaseLoader(data_infer_path)
    infer_data = raw_data_loader.load_lines()
    print('data loaded')

@@ -63,19 +63,20 @@ def train():
    # Config Loader
    train_args = ConfigSection()
    test_args = ConfigSection()
    ConfigLoader("good_name", "good_path").load_config(cfgfile, {"train": train_args, "test": test_args})
    ConfigLoader("good_path").load_config(cfgfile, {"train": train_args, "test": test_args})

    # Data Loader
    loader = TokenizeDatasetLoader(data_name, cws_data_path)
    loader = TokenizeDatasetLoader(cws_data_path)
    train_data = loader.load_pku()

    # Preprocessor
    p = POSPreprocess(train_data, pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes
    preprocessor = SeqLabelPreprocess()
    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = preprocessor.vocab_size
    train_args["num_classes"] = preprocessor.num_classes

    # Trainer
    trainer = SeqLabelTrainer(train_args)
    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = AdvSeqLabel(train_args)
@@ -83,10 +84,11 @@ def train():
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
        print("No saved model. Continue.")
        pass
        
    # Start training
    trainer.train(model)
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
@@ -98,7 +100,7 @@ def train():
 def test():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader("config", "").load_config(cfgfile, {"POS_test": test_args})
    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -106,6 +108,9 @@ def test():
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

    # load dev data
    dev_data = load_pickle(pickle_path, "data_dev.pkl")

    # Define the same model
    model = AdvSeqLabel(test_args)

@@ -114,13 +119,13 @@ def test():
    print("model loaded!")

    # Tester
    tester = SeqLabelTester(test_args)
    tester = SeqLabelTester(**test_args.data)

    # Start testing
    tester.test(model)
    tester.test(model, dev_data)

    # print test results
    print(tester.show_matrices())
    print(tester.show_metrics())
    print("model tested!")


--- a/reproduction/pos_tag_model/pos_tag.cfg
+++ b/reproduction/pos_tag_model/pos_tag.cfg
@@ -1,29 +1,35 @@
 [train]
 epochs = 10
 batch_size = 32
 epochs = 30
 batch_size = 64
 pickle_path = "./save/"
 validate = true
 save_best_dev = true
 model_saved_path = "./save/"
 rnn_hidden_units = 100
 rnn_layers = 2
 rnn_bi_direction = true
 word_emb_dim = 100
 dropout = 0.5
 use_crf = true
 use_cuda = true
 print_every_step = 10

 [test]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
 batch_size = 64
 batch_size = 640
 pickle_path = "./save/"
 use_crf = true
 use_cuda = true


 [POS_test]
 save_output = true
 validate_in_training = true
 save_dev_input = false
 save_loss = true
 batch_size = 640
 pickle_path = "./save/"
 rnn_hidden_units = 100
 rnn_layers = 1
 rnn_bi_direction = true
 word_emb_dim = 100
 dropout = 0.5
 use_crf = true
 use_cuda = true
 rnn_hidden_units = 100
 word_emb_dim = 100
--- a/reproduction/pos_tag_model/train_pos_tag.py
+++ b/reproduction/pos_tag_model/train_pos_tag.py
@@ -0,0 +1,146 @@
 import os
 import sys

 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader, BaseLoader
 from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle
 from fastNLP.saver.model_saver import ModelSaver
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.models.sequence_modeling import AdvSeqLabel
 from fastNLP.core.predictor import SeqLabelInfer

 # not in the file's dir
 if len(os.path.dirname(__file__)) != 0:
    os.chdir(os.path.dirname(__file__))
 datadir = "/home/zyfeng/data/"
 cfgfile = './pos_tag.cfg'
 data_name = "CWS_POS_TAG_NER_people_daily.txt"

 pos_tag_data_path = os.path.join(datadir, data_name)
 pickle_path = "save"
 data_infer_path = os.path.join(datadir, "infer.utf8")


 def infer():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

    # Define the same model
    model = AdvSeqLabel(test_args)

    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model loaded!')
    except Exception as e:
        print('cannot load model!')
        raise

    # Data Loader
    raw_data_loader = BaseLoader(data_infer_path)
    infer_data = raw_data_loader.load_lines()
    print('data loaded')

    # Inference interface
    infer = SeqLabelInfer(pickle_path)
    results = infer.predict(model, infer_data)

    print(results)
    print("Inference finished!")


 def train():
    # Config Loader
    train_args = ConfigSection()
    test_args = ConfigSection()
    ConfigLoader("good_name").load_config(cfgfile, {"train": train_args, "test": test_args})

    # Data Loader
    loader = PeopleDailyCorpusLoader(pos_tag_data_path)
    train_data, _ = loader.load()

    # Preprocessor
    preprocessor = SeqLabelPreprocess()
    data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3)
    train_args["vocab_size"] = preprocessor.vocab_size
    train_args["num_classes"] = preprocessor.num_classes

    # Trainer
    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = AdvSeqLabel(train_args)
    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
        print("No saved model. Continue.")
        pass

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./save/saved_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")


 def test():
    # Config Loader
    test_args = ConfigSection()
    ConfigLoader("config").load_config(cfgfile, {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

    # load dev data
    dev_data = load_pickle(pickle_path, "data_dev.pkl")

    # Define the same model
    model = AdvSeqLabel(test_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
    print("model loaded!")

    # Tester
    tester = SeqLabelTester(**test_args.data)

    # Start testing
    tester.test(model, dev_data)

    # print test results
    print(tester.show_metrics())
    print("model tested!")


 if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='Run a chinese word segmentation model')
    parser.add_argument('--mode', help='set the model\'s model', choices=['train', 'test', 'infer'])
    args = parser.parse_args()
    if args.mode == 'train':
        train()
    elif args.mode == 'test':
        test()
    elif args.mode == 'infer':
        infer()
    else:
        print('no mode specified for model!')
        parser.print_help()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.14.2
 torch==0.4.0
 torchvision>=0.1.8
 tensorboardX
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,24 @@
 #!/usr/bin/env python
 # coding=utf-8
 from setuptools import setup, find_packages

 with open('README.md') as f:
    readme = f.read()

 with open('LICENSE') as f:
    license = f.read()

 with open('requirements.txt') as f:
    reqs = f.read()

 setup(
    name='fastNLP',
    version='0.0.1',
    description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team',
    long_description=readme,
    license=license,
    author='fudanNLP',
    python_requires='>=3.5',
    packages=find_packages(),
    install_requires=reqs.strip().split('\n'),
 )
--- a/test/core/test_action.py
+++ b/test/core/test_action.py
@@ -1,9 +1,8 @@
 import os

 import unittest

 from fastNLP.core.action import Action, Batchifier, SequentialSampler


 class TestAction(unittest.TestCase):
    def test_case_1(self):
        x = [1, 2, 3, 4, 5, 6, 7, 8]
--- a/test/loader/test_loader.py
+++ b/test/loader/test_loader.py
@@ -33,8 +33,10 @@ class TestConfigLoader(unittest.TestCase):

        test_arg = ConfigSection()
        ConfigLoader("config").load_config(os.path.join("./test/loader", "config"), {"test": test_arg})

        section = read_section_from_config(os.path.join("./test/loader", "config"), "test")


        for sec in section:
            if (sec not in test_arg) or (section[sec] != test_arg[sec]):
                raise AttributeError("ERROR")
--- a/test/ner.py
+++ b/test/ner.py
@@ -1,138 +0,0 @@
 import _pickle
 import os

 import numpy as np
 import torch

 from fastNLP.core.preprocess import SeqLabelPreprocess
 from fastNLP.core.tester import SeqLabelTester
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.models.sequence_modeling import AdvSeqLabel


 class MyNERTrainer(SeqLabelTrainer):
    def __init__(self, train_args):
        super(MyNERTrainer, self).__init__(train_args)
        self.scheduler = None

    def define_optimizer(self):
        """
        override
        :return:
        """
        self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5)

    def update(self):
        """
        override
        :return:
        """
        self.optimizer.step()
        self.scheduler.step()

    def _create_validator(self, valid_args):
        return MyNERTester(valid_args)

    def best_eval_result(self, validator):
        accuracy = validator.metrics()
        if accuracy > self.best_accuracy:
            self.best_accuracy = accuracy
            return True
        else:
            return False


 class MyNERTester(SeqLabelTester):
    def __init__(self, test_args):
        super(MyNERTester, self).__init__(test_args)

    def _evaluate(self, prediction, batch_y, seq_len):
        """
        :param prediction: [batch_size, seq_len, num_classes]
        :param batch_y: [batch_size, seq_len]
        :param seq_len: [batch_size]
        :return:
        """
        summ = 0
        correct = 0
        _, indices = torch.max(prediction, 2)
        for p, y, l in zip(indices, batch_y, seq_len):
            summ += l
            correct += np.sum(p[:l].cpu().numpy() == y[:l].cpu().numpy())
        return float(correct / summ)

    def evaluate(self, predict, truth):
        return self._evaluate(predict, truth, self.seq_len)

    def metrics(self):
        return np.mean(self.eval_history)

    def show_matrices(self):
        return "dev accuracy={:.2f}".format(float(self.metrics()))


 def embedding_process(emb_file, word_dict, emb_dim, emb_pkl):
    if os.path.exists(emb_pkl):
        with open(emb_pkl, "rb") as f:
            embedding_np = _pickle.load(f)
        return embedding_np
    with open(emb_file, "r", encoding="utf-8") as f:
        embedding_np = np.random.uniform(-1, 1, size=(len(word_dict), emb_dim))
        for line in f:
            line = line.strip().split()
            if len(line) != emb_dim + 1:
                continue
            if line[0] in word_dict:
                embedding_np[word_dict[line[0]]] = [float(i) for i in line[1:]]
    with open(emb_pkl, "wb") as f:
        _pickle.dump(embedding_np, f)
    return embedding_np


 def data_load(data_file):
    with open(data_file, "r", encoding="utf-8") as f:
        all_data = []
        sent = []
        label = []
        for line in f:
            line = line.strip().split()

            if not len(line) <= 1:
                sent.append(line[0])
                label.append(line[1])
            else:
                all_data.append([sent, label])
                sent = []
                label = []
    return all_data


 data_path = "data_for_tests/people.txt"
 pick_path = "data_for_tests/"
 emb_path = "data_for_tests/emb50.txt"
 save_path = "data_for_tests/"
 if __name__ == "__main__":
    data = data_load(data_path)
    preprocess = SeqLabelPreprocess()
    data_train, data_dev = preprocess.run(data, pickle_path=pick_path, train_dev_split=0.3)
    # emb = embedding_process(emb_path, p.word2index, 50, os.path.join(pick_path, "embedding.pkl"))
    emb = None
    args = {"epochs": 20,
            "batch_size": 1,
            "pickle_path": pick_path,
            "validate": True,
            "save_best_dev": True,
            "model_saved_path": save_path,
            "use_cuda": True,

            "vocab_size": preprocess.vocab_size,
            "num_classes": preprocess.num_classes,
            "word_emb_dim": 50,
            "rnn_hidden_units": 100
            }
    # emb = torch.Tensor(emb).float().cuda()
    networks = AdvSeqLabel(args, emb)
    trainer = MyNERTrainer(args)
    trainer.train(networks, data_train, data_dev)
    print("Training finished!")
--- a/test/ner_decode.py
+++ b/test/ner_decode.py
@@ -1,129 +0,0 @@
 import _pickle
 import os

 import torch

 from fastNLP.core.predictor import SeqLabelInfer
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.loader.model_loader import ModelLoader
 from fastNLP.models.sequence_modeling import AdvSeqLabel


 class Decode(SeqLabelTrainer):
    def __init__(self, args):
        super(Decode, self).__init__(args)

    def decoder(self, network, sents, model_path):
        self.model = network
        self.model.load_state_dict(torch.load(model_path))
        out_put = []
        self.mode(network, test=True)
        for batch_x in sents:
            prediction = self.data_forward(self.model, batch_x)

            seq_tag = self.model.prediction(prediction, batch_x[1])

            out_put.append(list(seq_tag)[0])
        return out_put


 def process_sent(sents, word2id):
    sents_num = []
    for s in sents:
        sent_num = []
        for c in s:
            if c in word2id:
                sent_num.append(word2id[c])
            else:
                sent_num.append(word2id["<unk>"])
        sents_num.append(([sent_num], [len(sent_num)]))  # batch_size is 1

    return sents_num


 def process_tag(sents, tags, id2class):
    Tags = []
    for ttt in tags:
        Tags.append([id2class[t] for t in ttt])

    Segs = []
    PosNers = []
    for sent, tag in zip(sents, tags):
        word__ = []
        lll__ = []
        for c, t in zip(sent, tag):

            t = id2class[t]
            l = t.split("-")
            split_ = l[0]
            pn = l[1]

            if split_ == "S":
                word__.append(c)
                lll__.append(pn)
                word_1 = ""
            elif split_ == "E":
                word_1 += c
                word__.append(word_1)
                lll__.append(pn)
                word_1 = ""
            elif split_ == "B":
                word_1 = ""
                word_1 += c
            else:
                word_1 += c
        Segs.append(word__)
        PosNers.append(lll__)
    return Segs, PosNers


 pickle_path = "data_for_tests/"
 model_path = "data_for_tests/model_best_dev.pkl"
 if __name__ == "__main__":

    with open(os.path.join(pickle_path, "id2word.pkl"), "rb") as f:
        id2word = _pickle.load(f)
    with open(os.path.join(pickle_path, "word2id.pkl"), "rb") as f:
        word2id = _pickle.load(f)
    with open(os.path.join(pickle_path, "id2class.pkl"), "rb") as f:
        id2class = _pickle.load(f)

    sent = ["中共中央总书记、国家主席江泽民",
            "逆向处理输入序列并返回逆序后的序列"]  # here is input

    args = {"epochs": 1,
            "batch_size": 1,
            "pickle_path": "data_for_tests/",
            "validate": True,
            "save_best_dev": True,
            "model_saved_path": "data_for_tests/",
            "use_cuda": False,

            "vocab_size": len(word2id),
            "num_classes": len(id2class),
            "word_emb_dim": 50,
            "rnn_hidden_units": 100,
            }
    """
    network = AdvSeqLabel(args, None)
    decoder_ = Decode(args)
    tags_num = decoder_.decoder(network, process_sent(sent, word2id), model_path=model_path)
    output_seg, output_pn = process_tag(sent, tags_num, id2class)  # here is output
    print(output_seg)
    print(output_pn)
    """
    # Define the same model
    model = AdvSeqLabel(args, None)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./data_for_tests/model_best_dev.pkl")
    print("model loaded!")

    # Inference interface
    infer = SeqLabelInfer(pickle_path)
    sent = [[ch for ch in s] for s in sent]
    results = infer.predict(model, sent)

    for res in results:
        print(res)
    print("Inference finished!")
--- a/test/readme_example.py
+++ b/test/readme_example.py
@@ -1,19 +1,13 @@
 # python: 3.5
 # pytorch: 0.4

 ################
 # Test cross validation.
 ################

 from fastNLP.loader.preprocess import ClassPreprocess

 from fastNLP.core.loss import Loss
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.predictor import ClassificationInfer
 from fastNLP.core.preprocess import ClassPreprocess
 from fastNLP.core.trainer import ClassificationTrainer
 from fastNLP.loader.dataset_loader import ClassDatasetLoader
 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import aggregation
 from fastNLP.modules import encoder
 from fastNLP.modules import decoder
 from fastNLP.modules import encoder


 class ClassificationModel(BaseModel):
@@ -28,7 +22,7 @@ class ClassificationModel(BaseModel):
        self.enc = encoder.Conv(
            in_channels=300, out_channels=100, kernel_size=3)
        self.agg = aggregation.MaxPool()
        self.dec = decoder.MLP(100, num_classes=num_classes)
        self.dec = decoder.MLP(size_layer=[100, num_classes])

    def forward(self, x):
        x = self.emb(x)  # [N,L] -> [N,L,C]
@@ -38,18 +32,17 @@ class ClassificationModel(BaseModel):
        return x


 data_dir = 'data'  # directory to save data and model
 train_path = 'test/data_for_tests/text_classify.txt'  # training set file
 data_dir = 'save/'  # directory to save data and model
 train_path = './data_for_tests/text_classify.txt'  # training set file

 # load dataset
 ds_loader = ClassDatasetLoader("train", train_path)
 ds_loader = ClassDatasetLoader(train_path)
 data = ds_loader.load()

 # pre-process dataset
 pre = ClassPreprocess(data, data_dir, cross_val=True, n_fold=5)
 # pre = ClassPreprocess(data, data_dir)
 n_classes = pre.num_classes
 vocab_size = pre.vocab_size
 pre = ClassPreprocess()
 train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir)
 n_classes, vocab_size = pre.num_classes, pre.vocab_size

 # construct model
 model_args = {
@@ -58,22 +51,25 @@ model_args = {
 }
 model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

 # train model
 # construct trainer
 train_args = {
    "epochs": 10,
    "batch_size": 50,
    "epochs": 3,
    "batch_size": 16,
    "pickle_path": data_dir,
    "validate": False,
    "save_best_dev": False,
    "model_saved_path": None,
    "use_cuda": True,
    "learn_rate": 1e-3,
    "momentum": 0.9}
 trainer = ClassificationTrainer(train_args)
 # trainer.train(model, ['data_train.pkl', 'data_dev.pkl'])
 trainer.cross_validate(model)
    "loss": Loss("cross_entropy"),
    "optimizer": Optimizer("Adam", lr=0.001)
 }
 trainer = ClassificationTrainer(**train_args)

 # start training
 trainer.train(model, train_data=train_set, dev_data=dev_set)

 # predict using model
 data_infer = [x[0] for x in data]
 infer = ClassificationInfer(data_dir)
 labels_pred = infer.predict(model, data_infer)
 labels_pred = infer.predict(model.cpu(), data_infer)
 print(labels_pred)
--- a/test/seq_labeling.py
+++ b/test/seq_labeling.py
@@ -33,7 +33,7 @@ data_infer_path = args.infer
 def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args})
    ConfigLoader("config.cfg").load_config(config_dir, {"POS_infer": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -49,7 +49,7 @@ def infer():
    print("model loaded!")

    # Data Loader
    raw_data_loader = BaseLoader("xxx", data_infer_path)
    raw_data_loader = BaseLoader(data_infer_path)
    infer_data = raw_data_loader.load_lines()

    # Inference interface
@@ -65,11 +65,11 @@ def train_and_test():
    # Config Loader
    trainer_args = ConfigSection()
    model_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config(config_dir, {
    ConfigLoader("config.cfg").load_config(config_dir, {
        "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args})

    # Data Loader
    pos_loader = POSDatasetLoader("xxx", data_path)
    pos_loader = POSDatasetLoader(data_path)
    train_data = pos_loader.load_lines()

    # Preprocessor
@@ -117,13 +117,13 @@ def train_and_test():

    # Load test configuration
    tester_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args})
    ConfigLoader("config.cfg").load_config(config_dir, {"test_seq_label_tester": tester_args})

    # Tester
    tester = SeqLabelTester(save_output=False,
                            save_loss=False,
                            save_best_dev=False,
                            batch_size=8,
                            batch_size=4,
                            use_cuda=False,
                            pickle_path=pickle_path,
                            model_name="seq_label_in_test.pkl",
@@ -134,10 +134,10 @@ def train_and_test():
    tester.test(model, data_dev)

    # print test results
    print(tester.show_matrices())
    print(tester.show_metrics())
    print("model tested!")


 if __name__ == "__main__":
    train_and_test()
    # train_and_test()
    infer()
--- a/test/test_cws.py
+++ b/test/test_cws.py
@@ -22,7 +22,7 @@ data_infer_path = "data_for_tests/people_infer.txt"
 def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
@@ -38,7 +38,7 @@ def infer():
    print("model loaded!")

    # Data Loader
    raw_data_loader = BaseLoader(data_name, data_infer_path)
    raw_data_loader = BaseLoader(data_infer_path)
    infer_data = raw_data_loader.load_lines()
    """
        Transform strings into list of list of strings. 
@@ -61,10 +61,10 @@ def infer():
 def train_test():
    # Config Loader
    train_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS": train_args})

    # Data Loader
    loader = TokenizeDatasetLoader(data_name, cws_data_path)
    loader = TokenizeDatasetLoader(cws_data_path)
    train_data = loader.load_pku()

    # Preprocessor
@@ -74,7 +74,7 @@ def train_test():
    train_args["num_classes"] = p.num_classes

    # Trainer
    trainer = SeqLabelTrainer(train_args)
    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = SeqLabeling(train_args)
@@ -99,16 +99,16 @@ def train_test():

    # Load test configuration
    test_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args})
    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})

    # Tester
    tester = SeqLabelTester(test_args)
    tester = SeqLabelTester(**test_args.data)

    # Start testing
    tester.test(model, data_train)

    # print test results
    print(tester.show_matrices())
    print(tester.show_metrics())
    print("model tested!")


--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@@ -1,13 +1,27 @@
 import sys

 sys.path.append("..")
 from fastNLP.fastnlp import FastNLP
 from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_results

 PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
 PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/"
 PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES = "/home/zyfeng/data/text_classify/"

 def word_seg():
    nlp = FastNLP("./data_for_tests/")
    nlp.load("seq_label_model")
    text = "这是最好的基于深度学习的中文分词系统。"
    result = nlp.run(text)
    print(result)
    print("FastNLP finished!")
    nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
    nlp.load("cws_basic_model", config_file="cws.cfg", section_name="POS_test")
    text = ["这是最好的基于深度学习的中文分词系统。",
            "大王叫我来巡山。",
            "我党多年来致力于改善人民生活水平。"]
    results = nlp.run(text)
    print(results)
    for example in results:
        words, labels = [], []
        for res in example:
            words.append(res[0])
            labels.append(res[1])
        print(interpret_word_seg_results(words, labels))


 def text_class():
@@ -19,5 +33,53 @@ def text_class():
    print("FastNLP finished!")


 def test_word_seg_interpret():
    foo = [[('这', 'S'), ('是', 'S'), ('最', 'S'), ('好', 'S'), ('的', 'S'), ('基', 'B'), ('于', 'E'), ('深', 'B'), ('度', 'E'),
            ('学', 'B'), ('习', 'E'), ('的', 'S'), ('中', 'B'), ('文', 'E'), ('分', 'B'), ('词', 'E'), ('系', 'B'), ('统', 'E'),
            ('。', 'S')]]
    chars = [x[0] for x in foo[0]]
    labels = [x[1] for x in foo[0]]
    print(interpret_word_seg_results(chars, labels))


 def test_interpret_cws_pos_results():
    foo = [
        [('这', 'S-r'), ('是', 'S-v'), ('最', 'S-d'), ('好', 'S-a'), ('的', 'S-u'), ('基', 'B-p'), ('于', 'E-p'), ('深', 'B-d'),
         ('度', 'E-d'), ('学', 'B-v'), ('习', 'E-v'), ('的', 'S-u'), ('中', 'B-nz'), ('文', 'E-nz'), ('分', 'B-vn'),
         ('词', 'E-vn'), ('系', 'B-n'), ('统', 'E-n'), ('。', 'S-w')]
    ]
    chars = [x[0] for x in foo[0]]
    labels = [x[1] for x in foo[0]]
    print(interpret_cws_pos_results(chars, labels))


 def pos_tag():
    nlp = FastNLP(model_dir=PATH_TO_POS_TAG_PICKLE_FILES)
    nlp.load("pos_tag_model", config_file="pos_tag.config", section_name="pos_tag_model")
    text = ["这是最好的基于深度学习的中文分词系统。",
            "大王叫我来巡山。",
            "我党多年来致力于改善人民生活水平。"]
    results = nlp.run(text)
    for example in results:
        words, labels = [], []
        for res in example:
            words.append(res[0])
            labels.append(res[1])
        print(interpret_cws_pos_results(words, labels))


 def text_classify():
    nlp = FastNLP(model_dir=PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES)
    nlp.load("text_classify_model", config_file="text_classify.cfg", section_name="model")
    text = [
        "世界物联网大会明日在京召开龙头股启动在即",
        "乌鲁木齐市新增一处城市中心旅游目的地",
        "朱元璋的大明朝真的源于明教吗？——告诉你一个真实的“明教”"]
    results = nlp.run(text)
    print(results)
    """
    ['finance', 'travel', 'history']
    """

 if __name__ == "__main__":
    text_class()
    text_classify()
--- a/test/test_tester.py
+++ b/test/test_tester.py
@@ -5,19 +5,19 @@ from fastNLP.loader.dataset_loader import TokenizeDatasetLoader
 from fastNLP.models.sequence_modeling import SeqLabeling

 data_name = "pku_training.utf8"
 cws_data_path = "/home/zyfeng/Desktop/data/pku_training.utf8"
 pickle_path = "data_for_tests"


 def foo():
    loader = TokenizeDatasetLoader(data_name, "./data_for_tests/cws_pku_utf_8")
    loader = TokenizeDatasetLoader("./data_for_tests/cws_pku_utf_8")
    train_data = loader.load_pku()

    train_args = ConfigSection()
    ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args})
    ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS": train_args})

    # Preprocessor
    p = SeqLabelPreprocess(train_data, pickle_path)
    p = SeqLabelPreprocess()
    train_data = p.run(train_data)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes

@@ -26,11 +26,11 @@ def foo():
    valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True,
                  "save_loss": True, "batch_size": 8, "pickle_path": "./data_for_tests/",
                  "use_cuda": True}
    validator = SeqLabelTester(valid_args)
    validator = SeqLabelTester(**valid_args)

    print("start validation.")
    validator.test(model)
    print(validator.show_matrices())
    validator.test(model, train_data)
    print(validator.show_metrics())


 if __name__ == "__main__":
--- a/test/text_classify.py
+++ b/test/text_classify.py
@@ -34,7 +34,7 @@ config_dir = args.config
 def infer():
    # load dataset
    print("Loading data...")
    ds_loader = ClassDatasetLoader("train", train_data_dir)
    ds_loader = ClassDatasetLoader(train_data_dir)
    data = ds_loader.load()
    unlabeled_data = [x[0] for x in data]

@@ -69,7 +69,7 @@ def train():

    # load dataset
    print("Loading data...")
    ds_loader = ClassDatasetLoader("train", train_data_dir)
    ds_loader = ClassDatasetLoader(train_data_dir)
    data = ds_loader.load()
    print(data[0])