diff --git a/README.md b/README.md index a9c4874b..8ebd9d30 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ [![Build Status](https://travis-ci.org/fastnlp/fastNLP.svg?branch=master)](https://travis-ci.org/fastnlp/fastNLP) [![codecov](https://codecov.io/gh/fastnlp/fastNLP/branch/master/graph/badge.svg)](https://codecov.io/gh/fastnlp/fastNLP) +[![PyPI version](https://badge.fury.io/py/fastNLP.svg)](https://badge.fury.io/py/fastNLP) +![Hex.pm](https://img.shields.io/hexpm/l/plug.svg) +[![Documentation Status](https://readthedocs.org/projects/fastnlp/badge/?version=latest)](http://fastnlp.readthedocs.io/?badge=latest) fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below: @@ -17,94 +20,9 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa ## Resources -- [Documentation](https://github.com/fastnlp/fastNLP) +- [Documentation](https://fastnlp.readthedocs.io/en/latest/) - [Source Code](https://github.com/fastnlp/fastNLP) - -## Example - -### Basic Usage - -A typical fastNLP routine is composed of four phases: loading dataset, pre-processing data, constructing model and training model. -```python -from fastNLP.core.preprocess import ClassPreprocess -from fastNLP.core.predictor import ClassificationInfer -from fastNLP.core.trainer import ClassificationTrainer -from fastNLP.loader.dataset_loader import ClassDatasetLoader -from fastNLP.models.base_model import BaseModel -from fastNLP.modules import aggregation -from fastNLP.modules import encoder -from fastNLP.modules import decoder -from fastNLP.core.loss import Loss -from fastNLP.core.optimizer import Optimizer - - -class ClassificationModel(BaseModel): - """ - Simple text classification model based on CNN. - """ - - def __init__(self, num_classes, vocab_size): - super(ClassificationModel, self).__init__() - - self.emb = encoder.Embedding(nums=vocab_size, dims=300) - self.enc = encoder.Conv( - in_channels=300, out_channels=100, kernel_size=3) - self.agg = aggregation.MaxPool() - self.dec = decoder.MLP(size_layer=[100, num_classes]) - - def forward(self, x): - x = self.emb(x) # [N,L] -> [N,L,C] - x = self.enc(x) # [N,L,C_in] -> [N,L,C_out] - x = self.agg(x) # [N,L,C] -> [N,C] - x = self.dec(x) # [N,C] -> [N, N_class] - return x - - -data_dir = 'save/' # directory to save data and model -train_path = './data_for_tests/text_classify.txt' # training set file - -# load dataset -ds_loader = ClassDatasetLoader(train_path) -data = ds_loader.load() - -# pre-process dataset -pre = ClassPreprocess() -train_set, dev_set = pre.run(data, train_dev_split=0.3, pickle_path=data_dir) -n_classes, vocab_size = pre.num_classes, pre.vocab_size - -# construct model -model_args = { - 'num_classes': n_classes, - 'vocab_size': vocab_size -} -model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size) - -# construct trainer -train_args = { - "epochs": 3, - "batch_size": 16, - "pickle_path": data_dir, - "validate": False, - "save_best_dev": False, - "model_saved_path": None, - "use_cuda": True, - "loss": Loss("cross_entropy"), - "optimizer": Optimizer("Adam", lr=0.001) -} -trainer = ClassificationTrainer(**train_args) - -# start training -trainer.train(model, train_data=train_set, dev_data=dev_set) - -# predict using model -data_infer = [x[0] for x in data] -infer = ClassificationInfer(data_dir) -labels_pred = infer.predict(model.cpu(), data_infer) -print(labels_pred) -``` - - ## Installation Run the following commands to install fastNLP package. ```shell diff --git a/docs/requirements.txt b/docs/requirements.txt index 3749c2cd..2809876b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ -sphinx --e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme -sphinxcontrib.katex \ No newline at end of file +numpy>=1.14.2 +http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl +torchvision>=0.1.8 +sphinx-rtd-theme==0.4.1 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index d4d73d2a..ff3639fa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -42,6 +42,8 @@ release = '1.0' extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', + ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/fastNLP.core.rst b/docs/source/fastNLP.core.rst index 880be59f..13943f72 100644 --- a/docs/source/fastNLP.core.rst +++ b/docs/source/fastNLP.core.rst @@ -1,62 +1,54 @@ -fastNLP.core package -==================== +fastNLP.core +============= -Submodules ----------- - -fastNLP.core.action module --------------------------- +fastNLP.core.action +-------------------- .. automodule:: fastNLP.core.action :members: - :undoc-members: - :show-inheritance: -fastNLP.core.metrics module ---------------------------- +fastNLP.core.loss +------------------ + +.. automodule:: fastNLP.core.loss + :members: + +fastNLP.core.metrics +--------------------- .. automodule:: fastNLP.core.metrics :members: - :undoc-members: - :show-inheritance: -fastNLP.core.optimizer module ------------------------------ +fastNLP.core.optimizer +----------------------- .. automodule:: fastNLP.core.optimizer :members: - :undoc-members: - :show-inheritance: -fastNLP.core.predictor module ------------------------------ +fastNLP.core.predictor +----------------------- .. automodule:: fastNLP.core.predictor :members: - :undoc-members: - :show-inheritance: -fastNLP.core.tester module --------------------------- +fastNLP.core.preprocess +------------------------ + +.. automodule:: fastNLP.core.preprocess + :members: + +fastNLP.core.tester +-------------------- .. automodule:: fastNLP.core.tester :members: - :undoc-members: - :show-inheritance: -fastNLP.core.trainer module ---------------------------- +fastNLP.core.trainer +--------------------- .. automodule:: fastNLP.core.trainer :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.core :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.loader.rst b/docs/source/fastNLP.loader.rst index 90123b5b..658e07ff 100644 --- a/docs/source/fastNLP.loader.rst +++ b/docs/source/fastNLP.loader.rst @@ -1,62 +1,36 @@ -fastNLP.loader package -====================== +fastNLP.loader +=============== -Submodules ----------- - -fastNLP.loader.base\_loader module ----------------------------------- +fastNLP.loader.base\_loader +---------------------------- .. automodule:: fastNLP.loader.base_loader :members: - :undoc-members: - :show-inheritance: -fastNLP.loader.config\_loader module ------------------------------------- +fastNLP.loader.config\_loader +------------------------------ .. automodule:: fastNLP.loader.config_loader :members: - :undoc-members: - :show-inheritance: -fastNLP.loader.dataset\_loader module -------------------------------------- +fastNLP.loader.dataset\_loader +------------------------------- .. automodule:: fastNLP.loader.dataset_loader :members: - :undoc-members: - :show-inheritance: -fastNLP.loader.embed\_loader module ------------------------------------ +fastNLP.loader.embed\_loader +----------------------------- .. automodule:: fastNLP.loader.embed_loader :members: - :undoc-members: - :show-inheritance: -fastNLP.loader.model\_loader module ------------------------------------ +fastNLP.loader.model\_loader +----------------------------- .. automodule:: fastNLP.loader.model_loader :members: - :undoc-members: - :show-inheritance: - -fastNLP.loader.preprocess module --------------------------------- - -.. automodule:: fastNLP.loader.preprocess - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.loader :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.models.rst b/docs/source/fastNLP.models.rst index 49481ac1..f17b1d49 100644 --- a/docs/source/fastNLP.models.rst +++ b/docs/source/fastNLP.models.rst @@ -1,46 +1,30 @@ -fastNLP.models package -====================== +fastNLP.models +=============== -Submodules ----------- - -fastNLP.models.base\_model module ---------------------------------- +fastNLP.models.base\_model +--------------------------- .. automodule:: fastNLP.models.base_model :members: - :undoc-members: - :show-inheritance: -fastNLP.models.char\_language\_model module -------------------------------------------- +fastNLP.models.char\_language\_model +------------------------------------- .. automodule:: fastNLP.models.char_language_model :members: - :undoc-members: - :show-inheritance: -fastNLP.models.cnn\_text\_classification module ------------------------------------------------ +fastNLP.models.cnn\_text\_classification +----------------------------------------- .. automodule:: fastNLP.models.cnn_text_classification :members: - :undoc-members: - :show-inheritance: -fastNLP.models.sequence\_modeling module ----------------------------------------- +fastNLP.models.sequence\_modeling +---------------------------------- .. automodule:: fastNLP.models.sequence_modeling :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.models :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.modules.aggregation.rst b/docs/source/fastNLP.modules.aggregation.rst index 7106f7bc..bfaf8646 100644 --- a/docs/source/fastNLP.modules.aggregation.rst +++ b/docs/source/fastNLP.modules.aggregation.rst @@ -1,54 +1,36 @@ -fastNLP.modules.aggregation package -=================================== +fastNLP.modules.aggregation +============================ -Submodules ----------- - -fastNLP.modules.aggregation.attention module --------------------------------------------- +fastNLP.modules.aggregation.attention +-------------------------------------- .. automodule:: fastNLP.modules.aggregation.attention :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.aggregation.avg\_pool module --------------------------------------------- +fastNLP.modules.aggregation.avg\_pool +-------------------------------------- .. automodule:: fastNLP.modules.aggregation.avg_pool :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.aggregation.kmax\_pool module ---------------------------------------------- +fastNLP.modules.aggregation.kmax\_pool +--------------------------------------- .. automodule:: fastNLP.modules.aggregation.kmax_pool :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.aggregation.max\_pool module --------------------------------------------- +fastNLP.modules.aggregation.max\_pool +-------------------------------------- .. automodule:: fastNLP.modules.aggregation.max_pool :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.aggregation.self\_attention module --------------------------------------------------- +fastNLP.modules.aggregation.self\_attention +-------------------------------------------- .. automodule:: fastNLP.modules.aggregation.self_attention :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.modules.aggregation :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.modules.decoder.rst b/docs/source/fastNLP.modules.decoder.rst index 914802da..6844543a 100644 --- a/docs/source/fastNLP.modules.decoder.rst +++ b/docs/source/fastNLP.modules.decoder.rst @@ -1,22 +1,18 @@ -fastNLP.modules.decoder package -=============================== +fastNLP.modules.decoder +======================== -Submodules ----------- - -fastNLP.modules.decoder.CRF module ----------------------------------- +fastNLP.modules.decoder.CRF +---------------------------- .. automodule:: fastNLP.modules.decoder.CRF :members: - :undoc-members: - :show-inheritance: +fastNLP.modules.decoder.MLP +---------------------------- + +.. automodule:: fastNLP.modules.decoder.MLP + :members: -Module contents ---------------- .. automodule:: fastNLP.modules.decoder :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.modules.encoder.rst b/docs/source/fastNLP.modules.encoder.rst index 3af14b64..41b4ce13 100644 --- a/docs/source/fastNLP.modules.encoder.rst +++ b/docs/source/fastNLP.modules.encoder.rst @@ -1,78 +1,54 @@ -fastNLP.modules.encoder package -=============================== +fastNLP.modules.encoder +======================== -Submodules ----------- - -fastNLP.modules.encoder.char\_embedding module ----------------------------------------------- +fastNLP.modules.encoder.char\_embedding +---------------------------------------- .. automodule:: fastNLP.modules.encoder.char_embedding :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.conv module ------------------------------------ +fastNLP.modules.encoder.conv +----------------------------- .. automodule:: fastNLP.modules.encoder.conv :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.conv\_maxpool module --------------------------------------------- +fastNLP.modules.encoder.conv\_maxpool +-------------------------------------- .. automodule:: fastNLP.modules.encoder.conv_maxpool :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.embedding module ----------------------------------------- +fastNLP.modules.encoder.embedding +---------------------------------- .. automodule:: fastNLP.modules.encoder.embedding :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.linear module -------------------------------------- +fastNLP.modules.encoder.linear +------------------------------- .. automodule:: fastNLP.modules.encoder.linear :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.lstm module ------------------------------------ +fastNLP.modules.encoder.lstm +----------------------------- .. automodule:: fastNLP.modules.encoder.lstm :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.masked\_rnn module ------------------------------------------- +fastNLP.modules.encoder.masked\_rnn +------------------------------------ .. automodule:: fastNLP.modules.encoder.masked_rnn :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.encoder.variational\_rnn module ------------------------------------------------ +fastNLP.modules.encoder.variational\_rnn +----------------------------------------- .. automodule:: fastNLP.modules.encoder.variational_rnn :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.modules.encoder :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.modules.interaction.rst b/docs/source/fastNLP.modules.interaction.rst index 32552231..91a34268 100644 --- a/docs/source/fastNLP.modules.interaction.rst +++ b/docs/source/fastNLP.modules.interaction.rst @@ -1,10 +1,5 @@ -fastNLP.modules.interaction package -=================================== - -Module contents ---------------- +fastNLP.modules.interaction +============================ .. automodule:: fastNLP.modules.interaction :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.modules.rst b/docs/source/fastNLP.modules.rst index 5a6cac28..6ccdc21a 100644 --- a/docs/source/fastNLP.modules.rst +++ b/docs/source/fastNLP.modules.rst @@ -1,8 +1,5 @@ -fastNLP.modules package -======================= - -Subpackages ------------ +fastNLP.modules +================ .. toctree:: @@ -11,30 +8,18 @@ Subpackages fastNLP.modules.encoder fastNLP.modules.interaction -Submodules ----------- - -fastNLP.modules.other\_modules module -------------------------------------- +fastNLP.modules.other\_modules +------------------------------- .. automodule:: fastNLP.modules.other_modules :members: - :undoc-members: - :show-inheritance: -fastNLP.modules.utils module ----------------------------- +fastNLP.modules.utils +---------------------- .. automodule:: fastNLP.modules.utils :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.modules :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.rst b/docs/source/fastNLP.rst index fbc3a922..bb5037ce 100644 --- a/docs/source/fastNLP.rst +++ b/docs/source/fastNLP.rst @@ -1,8 +1,5 @@ -fastNLP package -=============== - -Subpackages ------------ +fastNLP +======== .. toctree:: @@ -12,22 +9,12 @@ Subpackages fastNLP.modules fastNLP.saver -Submodules ----------- - -fastNLP.fastnlp module ----------------------- +fastNLP.fastnlp +---------------- .. automodule:: fastNLP.fastnlp :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/fastNLP.saver.rst b/docs/source/fastNLP.saver.rst index 7699c2e8..daa6fbe8 100644 --- a/docs/source/fastNLP.saver.rst +++ b/docs/source/fastNLP.saver.rst @@ -1,30 +1,18 @@ -fastNLP.saver package -===================== +fastNLP.saver +============== -Submodules ----------- - -fastNLP.saver.logger module ---------------------------- +fastNLP.saver.logger +--------------------- .. automodule:: fastNLP.saver.logger :members: - :undoc-members: - :show-inheritance: -fastNLP.saver.model\_saver module ---------------------------------- +fastNLP.saver.model\_saver +--------------------------- .. automodule:: fastNLP.saver.model_saver :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- .. automodule:: fastNLP.saver :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/figures/procedures_and_sequence_labeling.png b/docs/source/figures/procedures_and_sequence_labeling.png new file mode 100644 index 00000000..06adc051 Binary files /dev/null and b/docs/source/figures/procedures_and_sequence_labeling.png differ diff --git a/docs/source/figures/text_classification.png b/docs/source/figures/text_classification.png new file mode 100644 index 00000000..5884c64e Binary files /dev/null and b/docs/source/figures/text_classification.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index 1caf2373..37798321 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,16 +1,54 @@ -.. fastNLP documentation master file, created by - sphinx-quickstart on Mon Aug 20 17:06:44 2018. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +fastNLP documentation +===================== +fastNLP,目前仍在孵化中。 -Welcome to fastNLP's documentation! -=================================== + +Introduction +------------ + +fastNLP是一个基于PyTorch的模块化自然语言处理系统,用于快速开发NLP工具。 +它将基于深度学习的NLP模型划分为不同的模块。 +这些模块分为4类:encoder(编码),interaction(交互), aggregration(聚合) and decoder(解码), +而每个类别包含不同的实现模块。 + +大多数当前的NLP模型可以构建在这些模块上,这极大地简化了开发NLP模型的过程。 +fastNLP的架构如下左图所示: + +.. image:: figures/procedures_and_sequence_labeling.png + +在constructing model部分,以序列标注(上右图)和文本分类(下图)为例进行说明: + +.. image:: figures/text_classification.png + +* encoder module:将输入编码为一些抽象表示,输入的是单词序列,输出向量序列。 +* interaction module:使表示中的信息相互交互,输入的是向量序列,输出的也是向量序列。 +* aggregation module:聚合和减少信息,输入向量序列,输出一个向量。 +* decoder module:将表示解码为输出,输出一个label(文本分类)或者输出label序列(序列标注) + +其中interaction module和aggregation module在模型中不一定存在,例如上面的序列标注模型。 + + + + +User's Guide +------------ +.. toctree:: + :maxdepth: 2 + + user/installation + user/quickstart + + +API Reference +------------- + +If you are looking for information on a specific function, class or +method, this part of the documentation is for you. .. toctree:: - :maxdepth: 4 - :caption: Contents: + :maxdepth: 2 - fastNLP + fastNLP API diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index e9a92cb7..00000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -fastNLP -======= - -.. toctree:: - :maxdepth: 4 - - fastNLP diff --git a/docs/source/user/installation.rst b/docs/source/user/installation.rst new file mode 100644 index 00000000..0655041b --- /dev/null +++ b/docs/source/user/installation.rst @@ -0,0 +1,31 @@ +============ +Installation +============ + +.. contents:: + :local: + + +Cloning From GitHub +~~~~~~~~~~~~~~~~~~~ + +If you just want to use fastNLP, use: + +.. code:: shell + + git clone https://github.com/fastnlp/fastNLP + cd fastNLP + + +PyTorch Installation +~~~~~~~~~~~~~~~~~~~~ + +Visit the [PyTorch official website] for installation instructions based +on your system. In general, you could use: + +.. code:: shell + + # using conda + conda install pytorch torchvision -c pytorch + # or using pip + pip3 install torch torchvision diff --git a/docs/source/user/quickstart.rst b/docs/source/user/quickstart.rst new file mode 100644 index 00000000..c8340053 --- /dev/null +++ b/docs/source/user/quickstart.rst @@ -0,0 +1,84 @@ +========== +Quickstart +========== + +Example +------- + +Basic Usage +~~~~~~~~~~~ + +A typical fastNLP routine is composed of four phases: loading dataset, +pre-processing data, constructing model and training model. + +.. code:: python + + from fastNLP.models.base_model import BaseModel + from fastNLP.modules import encoder + from fastNLP.modules import aggregation + from fastNLP.modules import decoder + + from fastNLP.loader.dataset_loader import ClassDatasetLoader + from fastNLP.loader.preprocess import ClassPreprocess + from fastNLP.core.trainer import ClassificationTrainer + from fastNLP.core.inference import ClassificationInfer + + + class ClassificationModel(BaseModel): + """ + Simple text classification model based on CNN. + """ + + def __init__(self, num_classes, vocab_size): + super(ClassificationModel, self).__init__() + + self.emb = encoder.Embedding(nums=vocab_size, dims=300) + self.enc = encoder.Conv( + in_channels=300, out_channels=100, kernel_size=3) + self.agg = aggregation.MaxPool() + self.dec = decoder.MLP(100, num_classes=num_classes) + + def forward(self, x): + x = self.emb(x) # [N,L] -> [N,L,C] + x = self.enc(x) # [N,L,C_in] -> [N,L,C_out] + x = self.agg(x) # [N,L,C] -> [N,C] + x = self.dec(x) # [N,C] -> [N, N_class] + return x + + + data_dir = 'data' # directory to save data and model + train_path = 'test/data_for_tests/text_classify.txt' # training set file + + # load dataset + ds_loader = ClassDatasetLoader("train", train_path) + data = ds_loader.load() + + # pre-process dataset + pre = ClassPreprocess(data_dir) + vocab_size, n_classes = pre.process(data, "data_train.pkl") + + # construct model + model_args = { + 'num_classes': n_classes, + 'vocab_size': vocab_size + } + model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size) + + # train model + train_args = { + "epochs": 20, + "batch_size": 50, + "pickle_path": data_dir, + "validate": False, + "save_best_dev": False, + "model_saved_path": None, + "use_cuda": True, + "learn_rate": 1e-3, + "momentum": 0.9} + trainer = ClassificationTrainer(train_args) + trainer.train(model) + + # predict using model + seqs = [x[0] for x in data] + infer = ClassificationInfer(data_dir) + labels_pred = infer.predict(model, seqs) \ No newline at end of file diff --git a/fastNLP/core/preprocess.py b/fastNLP/core/preprocess.py index 1c419ce9..5a95e39b 100644 --- a/fastNLP/core/preprocess.py +++ b/fastNLP/core/preprocess.py @@ -59,7 +59,6 @@ class BasePreprocess(object): def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10): """Main preprocessing pipeline. - :param train_dev_data: three-level list, with either single label or multiple labels in a sample. :param test_data: three-level list, with either single label or multiple labels in a sample. (optional) :param pickle_path: str, the path to save the pickle files. @@ -98,6 +97,8 @@ class BasePreprocess(object): save_pickle(data_train, pickle_path, "data_train.pkl") else: data_train = load_pickle(pickle_path, "data_train.pkl") + if pickle_exist(pickle_path, "data_dev.pkl"): + data_dev = load_pickle(pickle_path, "data_dev.pkl") else: # cross_val is True if not pickle_exist(pickle_path, "data_train_0.pkl"): diff --git a/fastNLP/modules/encoder/embedding.py b/fastNLP/modules/encoder/embedding.py index b2641bff..73ddd77a 100644 --- a/fastNLP/modules/encoder/embedding.py +++ b/fastNLP/modules/encoder/embedding.py @@ -15,7 +15,7 @@ class Embedding(nn.Module): def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): super(Embedding, self).__init__() self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) - if init_emb: + if init_emb is not None: self.embed.weight = nn.Parameter(init_emb) self.dropout = nn.Dropout(dropout)