Browse Source

Merge branch 'master' into pos_tag_service

tags/v0.1.0
Coet GitHub 6 years ago
parent
commit
a13b8e5c82
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 280 additions and 353 deletions
  1. +4
    -86
      README.md
  2. +4
    -3
      docs/requirements.txt
  3. +2
    -0
      docs/source/conf.py
  4. +26
    -34
      docs/source/fastNLP.core.rst
  5. +12
    -38
      docs/source/fastNLP.loader.rst
  6. +10
    -26
      docs/source/fastNLP.models.rst
  7. +12
    -30
      docs/source/fastNLP.modules.aggregation.rst
  8. +9
    -13
      docs/source/fastNLP.modules.decoder.rst
  9. +18
    -42
      docs/source/fastNLP.modules.encoder.rst
  10. +2
    -7
      docs/source/fastNLP.modules.interaction.rst
  11. +6
    -21
      docs/source/fastNLP.modules.rst
  12. +4
    -17
      docs/source/fastNLP.rst
  13. +6
    -18
      docs/source/fastNLP.saver.rst
  14. BIN
      docs/source/figures/procedures_and_sequence_labeling.png
  15. BIN
      docs/source/figures/text_classification.png
  16. +47
    -9
      docs/source/index.rst
  17. +0
    -7
      docs/source/modules.rst
  18. +31
    -0
      docs/source/user/installation.rst
  19. +84
    -0
      docs/source/user/quickstart.rst
  20. +2
    -1
      fastNLP/core/preprocess.py
  21. +1
    -1
      fastNLP/modules/encoder/embedding.py

+ 4
- 86
README.md View File

@@ -2,6 +2,9 @@


[![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP) [![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP)
[![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP) [![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP)
[![PyPI version](https://badge.fury.io/py/fastNLP.svg)](https://badge.fury.io/py/fastNLP)
![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
[![Documentation Status](https://readthedocs.org/projects/fastnlp/badge/?version=latest)](http://fastnlp.readthedocs.io/?badge=latest)


fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below: fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below:


@@ -17,94 +20,9 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa


## Resources ## Resources


- [Documentation](https://github.com/fastnlp/fastNLP)
- [Documentation](https://fastnlp.readthedocs.io/en/latest/)
- [Source Code](https://github.com/fastnlp/fastNLP) - [Source Code](https://github.com/fastnlp/fastNLP)



## Example

### Basic Usage

A typical fastNLP routine is composed of four phases: loading dataset, pre-processing data, constructing model and training model.
```python
from fastNLP.core.preprocess import ClassPreprocess
from fastNLP.core.predictor import ClassificationInfer
from fastNLP.core.trainer import ClassificationTrainer
from fastNLP.loader.dataset_loader import ClassDatasetLoader
from fastNLP.models.base_model import BaseModel
from fastNLP.modules import aggregation
from fastNLP.modules import encoder
from fastNLP.modules import decoder
from fastNLP.core.loss import Loss
from fastNLP.core.optimizer import Optimizer


class ClassificationModel(BaseModel):
"""
Simple text classification model based on CNN.
"""

def __init__(self, num_classes, vocab_size):
super(ClassificationModel, self).__init__()

self.emb = encoder.Embedding(nums=vocab_size, dims=300)
self.enc = encoder.Conv(
in_channels=300, out_channels=100, kernel_size=3)
self.agg = aggregation.MaxPool()
self.dec = decoder.MLP(size_layer=[100, num_classes])

def forward(self, x):
x = self.emb(x) # [N,L] -> [N,L,C]
x = self.enc(x) # [N,L,C_in] -> [N,L,C_out]
x = self.agg(x) # [N,L,C] -> [N,C]
x = self.dec(x) # [N,C] -> [N, N_class]
return x


data_dir = 'save/' # directory to save data and model
train_path = './data_for_tests/text_classify.txt' # training set file

# load dataset
ds_loader = ClassDatasetLoader(train_path)
data = ds_loader.load()

# pre-process dataset
pre = ClassPreprocess()
train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir)
n_classes, vocab_size = pre.num_classes, pre.vocab_size

# construct model
model_args = {
'num_classes': n_classes,
'vocab_size': vocab_size
}
model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

# construct trainer
train_args = {
"epochs": 3,
"batch_size": 16,
"pickle_path": data_dir,
"validate": False,
"save_best_dev": False,
"model_saved_path": None,
"use_cuda": True,
"loss": Loss("cross_entropy"),
"optimizer": Optimizer("Adam", lr=0.001)
}
trainer = ClassificationTrainer(**train_args)

# start training
trainer.train(model, train_data=train_set, dev_data=dev_set)

# predict using model
data_infer = [x[0] for x in data]
infer = ClassificationInfer(data_dir)
labels_pred = infer.predict(model.cpu(), data_infer)
print(labels_pred)
```


## Installation ## Installation
Run the following commands to install fastNLP package. Run the following commands to install fastNLP package.
```shell ```shell


+ 4
- 3
docs/requirements.txt View File

@@ -1,3 +1,4 @@
sphinx
-e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
sphinxcontrib.katex
numpy>=1.14.2
http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
torchvision>=0.1.8
sphinx-rtd-theme==0.4.1

+ 2
- 0
docs/source/conf.py View File

@@ -42,6 +42,8 @@ release = '1.0'
extensions = [ extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.autodoc',
'sphinx.ext.viewcode', 'sphinx.ext.viewcode',
'sphinx.ext.autosummary',

] ]


# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.


+ 26
- 34
docs/source/fastNLP.core.rst View File

@@ -1,62 +1,54 @@
fastNLP.core package
====================
fastNLP.core
=============


Submodules
----------

fastNLP.core.action module
--------------------------
fastNLP.core.action
--------------------


.. automodule:: fastNLP.core.action .. automodule:: fastNLP.core.action
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.core.metrics module
---------------------------
fastNLP.core.loss
------------------

.. automodule:: fastNLP.core.loss
:members:

fastNLP.core.metrics
---------------------


.. automodule:: fastNLP.core.metrics .. automodule:: fastNLP.core.metrics
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.core.optimizer module
-----------------------------
fastNLP.core.optimizer
-----------------------


.. automodule:: fastNLP.core.optimizer .. automodule:: fastNLP.core.optimizer
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.core.predictor module
-----------------------------
fastNLP.core.predictor
-----------------------


.. automodule:: fastNLP.core.predictor .. automodule:: fastNLP.core.predictor
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.core.tester module
--------------------------
fastNLP.core.preprocess
------------------------

.. automodule:: fastNLP.core.preprocess
:members:

fastNLP.core.tester
--------------------


.. automodule:: fastNLP.core.tester .. automodule:: fastNLP.core.tester
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.core.trainer module
---------------------------
fastNLP.core.trainer
---------------------


.. automodule:: fastNLP.core.trainer .. automodule:: fastNLP.core.trainer
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.core .. automodule:: fastNLP.core
:members: :members:
:undoc-members:
:show-inheritance:

+ 12
- 38
docs/source/fastNLP.loader.rst View File

@@ -1,62 +1,36 @@
fastNLP.loader package
======================
fastNLP.loader
===============


Submodules
----------

fastNLP.loader.base\_loader module
----------------------------------
fastNLP.loader.base\_loader
----------------------------


.. automodule:: fastNLP.loader.base_loader .. automodule:: fastNLP.loader.base_loader
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.loader.config\_loader module
------------------------------------
fastNLP.loader.config\_loader
------------------------------


.. automodule:: fastNLP.loader.config_loader .. automodule:: fastNLP.loader.config_loader
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.loader.dataset\_loader module
-------------------------------------
fastNLP.loader.dataset\_loader
-------------------------------


.. automodule:: fastNLP.loader.dataset_loader .. automodule:: fastNLP.loader.dataset_loader
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.loader.embed\_loader module
-----------------------------------
fastNLP.loader.embed\_loader
-----------------------------


.. automodule:: fastNLP.loader.embed_loader .. automodule:: fastNLP.loader.embed_loader
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.loader.model\_loader module
-----------------------------------
fastNLP.loader.model\_loader
-----------------------------


.. automodule:: fastNLP.loader.model_loader .. automodule:: fastNLP.loader.model_loader
:members: :members:
:undoc-members:
:show-inheritance:

fastNLP.loader.preprocess module
--------------------------------

.. automodule:: fastNLP.loader.preprocess
:members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.loader .. automodule:: fastNLP.loader
:members: :members:
:undoc-members:
:show-inheritance:

+ 10
- 26
docs/source/fastNLP.models.rst View File

@@ -1,46 +1,30 @@
fastNLP.models package
======================
fastNLP.models
===============


Submodules
----------

fastNLP.models.base\_model module
---------------------------------
fastNLP.models.base\_model
---------------------------


.. automodule:: fastNLP.models.base_model .. automodule:: fastNLP.models.base_model
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.models.char\_language\_model module
-------------------------------------------
fastNLP.models.char\_language\_model
-------------------------------------


.. automodule:: fastNLP.models.char_language_model .. automodule:: fastNLP.models.char_language_model
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.models.cnn\_text\_classification module
-----------------------------------------------
fastNLP.models.cnn\_text\_classification
-----------------------------------------


.. automodule:: fastNLP.models.cnn_text_classification .. automodule:: fastNLP.models.cnn_text_classification
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.models.sequence\_modeling module
----------------------------------------
fastNLP.models.sequence\_modeling
----------------------------------


.. automodule:: fastNLP.models.sequence_modeling .. automodule:: fastNLP.models.sequence_modeling
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.models .. automodule:: fastNLP.models
:members: :members:
:undoc-members:
:show-inheritance:

+ 12
- 30
docs/source/fastNLP.modules.aggregation.rst View File

@@ -1,54 +1,36 @@
fastNLP.modules.aggregation package
===================================
fastNLP.modules.aggregation
============================


Submodules
----------

fastNLP.modules.aggregation.attention module
--------------------------------------------
fastNLP.modules.aggregation.attention
--------------------------------------


.. automodule:: fastNLP.modules.aggregation.attention .. automodule:: fastNLP.modules.aggregation.attention
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.aggregation.avg\_pool module
--------------------------------------------
fastNLP.modules.aggregation.avg\_pool
--------------------------------------


.. automodule:: fastNLP.modules.aggregation.avg_pool .. automodule:: fastNLP.modules.aggregation.avg_pool
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.aggregation.kmax\_pool module
---------------------------------------------
fastNLP.modules.aggregation.kmax\_pool
---------------------------------------


.. automodule:: fastNLP.modules.aggregation.kmax_pool .. automodule:: fastNLP.modules.aggregation.kmax_pool
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.aggregation.max\_pool module
--------------------------------------------
fastNLP.modules.aggregation.max\_pool
--------------------------------------


.. automodule:: fastNLP.modules.aggregation.max_pool .. automodule:: fastNLP.modules.aggregation.max_pool
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.aggregation.self\_attention module
--------------------------------------------------
fastNLP.modules.aggregation.self\_attention
--------------------------------------------


.. automodule:: fastNLP.modules.aggregation.self_attention .. automodule:: fastNLP.modules.aggregation.self_attention
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.modules.aggregation .. automodule:: fastNLP.modules.aggregation
:members: :members:
:undoc-members:
:show-inheritance:

+ 9
- 13
docs/source/fastNLP.modules.decoder.rst View File

@@ -1,22 +1,18 @@
fastNLP.modules.decoder package
===============================
fastNLP.modules.decoder
========================


Submodules
----------

fastNLP.modules.decoder.CRF module
----------------------------------
fastNLP.modules.decoder.CRF
----------------------------


.. automodule:: fastNLP.modules.decoder.CRF .. automodule:: fastNLP.modules.decoder.CRF
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.decoder.MLP
----------------------------

.. automodule:: fastNLP.modules.decoder.MLP
:members:


Module contents
---------------


.. automodule:: fastNLP.modules.decoder .. automodule:: fastNLP.modules.decoder
:members: :members:
:undoc-members:
:show-inheritance:

+ 18
- 42
docs/source/fastNLP.modules.encoder.rst View File

@@ -1,78 +1,54 @@
fastNLP.modules.encoder package
===============================
fastNLP.modules.encoder
========================


Submodules
----------

fastNLP.modules.encoder.char\_embedding module
----------------------------------------------
fastNLP.modules.encoder.char\_embedding
----------------------------------------


.. automodule:: fastNLP.modules.encoder.char_embedding .. automodule:: fastNLP.modules.encoder.char_embedding
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.conv module
-----------------------------------
fastNLP.modules.encoder.conv
-----------------------------


.. automodule:: fastNLP.modules.encoder.conv .. automodule:: fastNLP.modules.encoder.conv
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.conv\_maxpool module
--------------------------------------------
fastNLP.modules.encoder.conv\_maxpool
--------------------------------------


.. automodule:: fastNLP.modules.encoder.conv_maxpool .. automodule:: fastNLP.modules.encoder.conv_maxpool
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.embedding module
----------------------------------------
fastNLP.modules.encoder.embedding
----------------------------------


.. automodule:: fastNLP.modules.encoder.embedding .. automodule:: fastNLP.modules.encoder.embedding
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.linear module
-------------------------------------
fastNLP.modules.encoder.linear
-------------------------------


.. automodule:: fastNLP.modules.encoder.linear .. automodule:: fastNLP.modules.encoder.linear
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.lstm module
-----------------------------------
fastNLP.modules.encoder.lstm
-----------------------------


.. automodule:: fastNLP.modules.encoder.lstm .. automodule:: fastNLP.modules.encoder.lstm
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.masked\_rnn module
------------------------------------------
fastNLP.modules.encoder.masked\_rnn
------------------------------------


.. automodule:: fastNLP.modules.encoder.masked_rnn .. automodule:: fastNLP.modules.encoder.masked_rnn
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.encoder.variational\_rnn module
-----------------------------------------------
fastNLP.modules.encoder.variational\_rnn
-----------------------------------------


.. automodule:: fastNLP.modules.encoder.variational_rnn .. automodule:: fastNLP.modules.encoder.variational_rnn
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.modules.encoder .. automodule:: fastNLP.modules.encoder
:members: :members:
:undoc-members:
:show-inheritance:

+ 2
- 7
docs/source/fastNLP.modules.interaction.rst View File

@@ -1,10 +1,5 @@
fastNLP.modules.interaction package
===================================

Module contents
---------------
fastNLP.modules.interaction
============================


.. automodule:: fastNLP.modules.interaction .. automodule:: fastNLP.modules.interaction
:members: :members:
:undoc-members:
:show-inheritance:

+ 6
- 21
docs/source/fastNLP.modules.rst View File

@@ -1,8 +1,5 @@
fastNLP.modules package
=======================

Subpackages
-----------
fastNLP.modules
================


.. toctree:: .. toctree::


@@ -11,30 +8,18 @@ Subpackages
fastNLP.modules.encoder fastNLP.modules.encoder
fastNLP.modules.interaction fastNLP.modules.interaction


Submodules
----------

fastNLP.modules.other\_modules module
-------------------------------------
fastNLP.modules.other\_modules
-------------------------------


.. automodule:: fastNLP.modules.other_modules .. automodule:: fastNLP.modules.other_modules
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.modules.utils module
----------------------------
fastNLP.modules.utils
----------------------


.. automodule:: fastNLP.modules.utils .. automodule:: fastNLP.modules.utils
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.modules .. automodule:: fastNLP.modules
:members: :members:
:undoc-members:
:show-inheritance:

+ 4
- 17
docs/source/fastNLP.rst View File

@@ -1,8 +1,5 @@
fastNLP package
===============

Subpackages
-----------
fastNLP
========


.. toctree:: .. toctree::


@@ -12,22 +9,12 @@ Subpackages
fastNLP.modules fastNLP.modules
fastNLP.saver fastNLP.saver


Submodules
----------

fastNLP.fastnlp module
----------------------
fastNLP.fastnlp
----------------


.. automodule:: fastNLP.fastnlp .. automodule:: fastNLP.fastnlp
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP .. automodule:: fastNLP
:members: :members:
:undoc-members:
:show-inheritance:

+ 6
- 18
docs/source/fastNLP.saver.rst View File

@@ -1,30 +1,18 @@
fastNLP.saver package
=====================
fastNLP.saver
==============


Submodules
----------

fastNLP.saver.logger module
---------------------------
fastNLP.saver.logger
---------------------


.. automodule:: fastNLP.saver.logger .. automodule:: fastNLP.saver.logger
:members: :members:
:undoc-members:
:show-inheritance:


fastNLP.saver.model\_saver module
---------------------------------
fastNLP.saver.model\_saver
---------------------------


.. automodule:: fastNLP.saver.model_saver .. automodule:: fastNLP.saver.model_saver
:members: :members:
:undoc-members:
:show-inheritance:



Module contents
---------------


.. automodule:: fastNLP.saver .. automodule:: fastNLP.saver
:members: :members:
:undoc-members:
:show-inheritance:

BIN
docs/source/figures/procedures_and_sequence_labeling.png View File

Before After
Width: 1079  |  Height: 558  |  Size: 51 kB

BIN
docs/source/figures/text_classification.png View File

Before After
Width: 1217  |  Height: 543  |  Size: 54 kB

+ 47
- 9
docs/source/index.rst View File

@@ -1,16 +1,54 @@
.. fastNLP documentation master file, created by
sphinx-quickstart on Mon Aug 20 17:06:44 2018.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
fastNLP documentation
=====================
fastNLP,目前仍在孵化中。


Welcome to fastNLP's documentation!
===================================

Introduction
------------

fastNLP是一个基于PyTorch的模块化自然语言处理系统,用于快速开发NLP工具。
它将基于深度学习的NLP模型划分为不同的模块。
这些模块分为4类:encoder(编码),interaction(交互), aggregration(聚合) and decoder(解码),
而每个类别包含不同的实现模块。

大多数当前的NLP模型可以构建在这些模块上,这极大地简化了开发NLP模型的过程。
fastNLP的架构如下左图所示:

.. image:: figures/procedures_and_sequence_labeling.png

在constructing model部分,以序列标注(上右图)和文本分类(下图)为例进行说明:

.. image:: figures/text_classification.png

* encoder module:将输入编码为一些抽象表示,输入的是单词序列,输出向量序列。
* interaction module:使表示中的信息相互交互,输入的是向量序列,输出的也是向量序列。
* aggregation module:聚合和减少信息,输入向量序列,输出一个向量。
* decoder module:将表示解码为输出,输出一个label(文本分类)或者输出label序列(序列标注)

其中interaction module和aggregation module在模型中不一定存在,例如上面的序列标注模型。




User's Guide
------------
.. toctree::
:maxdepth: 2

user/installation
user/quickstart


API Reference
-------------

If you are looking for information on a specific function, class or
method, this part of the documentation is for you.


.. toctree:: .. toctree::
:maxdepth: 4
:caption: Contents:
:maxdepth: 2
fastNLP
fastNLP API <fastNLP>








+ 0
- 7
docs/source/modules.rst View File

@@ -1,7 +0,0 @@
fastNLP
=======

.. toctree::
:maxdepth: 4

fastNLP

+ 31
- 0
docs/source/user/installation.rst View File

@@ -0,0 +1,31 @@
============
Installation
============

.. contents::
:local:


Cloning From GitHub
~~~~~~~~~~~~~~~~~~~

If you just want to use fastNLP, use:

.. code:: shell

git clone https://github.com/fastnlp/fastNLP
cd fastNLP

PyTorch Installation
~~~~~~~~~~~~~~~~~~~~

Visit the [PyTorch official website] for installation instructions based
on your system. In general, you could use:

.. code:: shell

# using conda
conda install pytorch torchvision -c pytorch
# or using pip
pip3 install torch torchvision

+ 84
- 0
docs/source/user/quickstart.rst View File

@@ -0,0 +1,84 @@
==========
Quickstart
==========

Example
-------

Basic Usage
~~~~~~~~~~~

A typical fastNLP routine is composed of four phases: loading dataset,
pre-processing data, constructing model and training model.

.. code:: python

from fastNLP.models.base_model import BaseModel
from fastNLP.modules import encoder
from fastNLP.modules import aggregation
from fastNLP.modules import decoder

from fastNLP.loader.dataset_loader import ClassDatasetLoader
from fastNLP.loader.preprocess import ClassPreprocess
from fastNLP.core.trainer import ClassificationTrainer
from fastNLP.core.inference import ClassificationInfer


class ClassificationModel(BaseModel):
"""
Simple text classification model based on CNN.
"""

def __init__(self, num_classes, vocab_size):
super(ClassificationModel, self).__init__()

self.emb = encoder.Embedding(nums=vocab_size, dims=300)
self.enc = encoder.Conv(
in_channels=300, out_channels=100, kernel_size=3)
self.agg = aggregation.MaxPool()
self.dec = decoder.MLP(100, num_classes=num_classes)

def forward(self, x):
x = self.emb(x) # [N,L] -> [N,L,C]
x = self.enc(x) # [N,L,C_in] -> [N,L,C_out]
x = self.agg(x) # [N,L,C] -> [N,C]
x = self.dec(x) # [N,C] -> [N, N_class]
return x


data_dir = 'data' # directory to save data and model
train_path = 'test/data_for_tests/text_classify.txt' # training set file

# load dataset
ds_loader = ClassDatasetLoader("train", train_path)
data = ds_loader.load()

# pre-process dataset
pre = ClassPreprocess(data_dir)
vocab_size, n_classes = pre.process(data, "data_train.pkl")

# construct model
model_args = {
'num_classes': n_classes,
'vocab_size': vocab_size
}
model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size)

# train model
train_args = {
"epochs": 20,
"batch_size": 50,
"pickle_path": data_dir,
"validate": False,
"save_best_dev": False,
"model_saved_path": None,
"use_cuda": True,
"learn_rate": 1e-3,
"momentum": 0.9}
trainer = ClassificationTrainer(train_args)
trainer.train(model)

# predict using model
seqs = [x[0] for x in data]
infer = ClassificationInfer(data_dir)
labels_pred = infer.predict(model, seqs)

+ 2
- 1
fastNLP/core/preprocess.py View File

@@ -59,7 +59,6 @@ class BasePreprocess(object):


def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10): def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10):
"""Main preprocessing pipeline. """Main preprocessing pipeline.

:param train_dev_data: three-level list, with either single label or multiple labels in a sample. :param train_dev_data: three-level list, with either single label or multiple labels in a sample.
:param test_data: three-level list, with either single label or multiple labels in a sample. (optional) :param test_data: three-level list, with either single label or multiple labels in a sample. (optional)
:param pickle_path: str, the path to save the pickle files. :param pickle_path: str, the path to save the pickle files.
@@ -98,6 +97,8 @@ class BasePreprocess(object):
save_pickle(data_train, pickle_path, "data_train.pkl") save_pickle(data_train, pickle_path, "data_train.pkl")
else: else:
data_train = load_pickle(pickle_path, "data_train.pkl") data_train = load_pickle(pickle_path, "data_train.pkl")
if pickle_exist(pickle_path, "data_dev.pkl"):
data_dev = load_pickle(pickle_path, "data_dev.pkl")
else: else:
# cross_val is True # cross_val is True
if not pickle_exist(pickle_path, "data_train_0.pkl"): if not pickle_exist(pickle_path, "data_train_0.pkl"):


+ 1
- 1
fastNLP/modules/encoder/embedding.py View File

@@ -15,7 +15,7 @@ class Embedding(nn.Module):
def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0):
super(Embedding, self).__init__() super(Embedding, self).__init__()
self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)
if init_emb:
if init_emb is not None:
self.embed.weight = nn.Parameter(init_emb) self.embed.weight = nn.Parameter(init_emb)
self.dropout = nn.Dropout(dropout) self.dropout = nn.Dropout(dropout)




Loading…
Cancel
Save