@@ -2,6 +2,6 @@ fastNLP.core.batch | |||||
================== | ================== | ||||
.. automodule:: fastNLP.core.batch | .. automodule:: fastNLP.core.batch | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.callback | |||||
===================== | ===================== | ||||
.. automodule:: fastNLP.core.callback | .. automodule:: fastNLP.core.callback | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.const | |||||
================== | ================== | ||||
.. automodule:: fastNLP.core.const | .. automodule:: fastNLP.core.const | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.dataset | |||||
==================== | ==================== | ||||
.. automodule:: fastNLP.core.dataset | .. automodule:: fastNLP.core.dataset | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.field | |||||
================== | ================== | ||||
.. automodule:: fastNLP.core.field | .. automodule:: fastNLP.core.field | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.instance | |||||
===================== | ===================== | ||||
.. automodule:: fastNLP.core.instance | .. automodule:: fastNLP.core.instance | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.losses | |||||
=================== | =================== | ||||
.. automodule:: fastNLP.core.losses | .. automodule:: fastNLP.core.losses | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.metrics | |||||
==================== | ==================== | ||||
.. automodule:: fastNLP.core.metrics | .. automodule:: fastNLP.core.metrics | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.optimizer | |||||
====================== | ====================== | ||||
.. automodule:: fastNLP.core.optimizer | .. automodule:: fastNLP.core.optimizer | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,15 +2,15 @@ fastNLP.core | |||||
============ | ============ | ||||
.. automodule:: fastNLP.core | .. automodule:: fastNLP.core | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | 子模块 | ||||
---------- | ---------- | ||||
.. toctree:: | .. toctree:: | ||||
:titlesonly: | |||||
:maxdepth: 1 | |||||
fastNLP.core.batch | fastNLP.core.batch | ||||
fastNLP.core.callback | fastNLP.core.callback | ||||
@@ -26,4 +26,3 @@ fastNLP.core | |||||
fastNLP.core.trainer | fastNLP.core.trainer | ||||
fastNLP.core.utils | fastNLP.core.utils | ||||
fastNLP.core.vocabulary | fastNLP.core.vocabulary | ||||
@@ -2,6 +2,6 @@ fastNLP.core.sampler | |||||
==================== | ==================== | ||||
.. automodule:: fastNLP.core.sampler | .. automodule:: fastNLP.core.sampler | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.tester | |||||
=================== | =================== | ||||
.. automodule:: fastNLP.core.tester | .. automodule:: fastNLP.core.tester | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.trainer | |||||
==================== | ==================== | ||||
.. automodule:: fastNLP.core.trainer | .. automodule:: fastNLP.core.trainer | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.utils | |||||
================== | ================== | ||||
.. automodule:: fastNLP.core.utils | .. automodule:: fastNLP.core.utils | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.core.vocabulary | |||||
======================= | ======================= | ||||
.. automodule:: fastNLP.core.vocabulary | .. automodule:: fastNLP.core.vocabulary | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.bert\_embedding | |||||
================================== | |||||
.. automodule:: fastNLP.embeddings.bert_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.char\_embedding | |||||
================================== | |||||
.. automodule:: fastNLP.embeddings.char_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.contextual\_embedding | |||||
======================================== | |||||
.. automodule:: fastNLP.embeddings.contextual_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.elmo\_embedding | |||||
================================== | |||||
.. automodule:: fastNLP.embeddings.elmo_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.embedding | |||||
============================ | |||||
.. automodule:: fastNLP.embeddings.embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,22 @@ | |||||
fastNLP.embeddings | |||||
================== | |||||
.. automodule:: fastNLP.embeddings | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | |||||
---------- | |||||
.. toctree:: | |||||
:maxdepth: 1 | |||||
fastNLP.embeddings.bert_embedding | |||||
fastNLP.embeddings.char_embedding | |||||
fastNLP.embeddings.contextual_embedding | |||||
fastNLP.embeddings.elmo_embedding | |||||
fastNLP.embeddings.embedding | |||||
fastNLP.embeddings.stack_embedding | |||||
fastNLP.embeddings.static_embedding | |||||
fastNLP.embeddings.utils |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.stack\_embedding | |||||
=================================== | |||||
.. automodule:: fastNLP.embeddings.stack_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.static\_embedding | |||||
==================================== | |||||
.. automodule:: fastNLP.embeddings.static_embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,7 @@ | |||||
fastNLP.embeddings.utils | |||||
======================== | |||||
.. automodule:: fastNLP.embeddings.utils | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.io.base\_loader | |||||
======================= | ======================= | ||||
.. automodule:: fastNLP.io.base_loader | .. automodule:: fastNLP.io.base_loader | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.io.data\_loader | |||||
========================== | ========================== | ||||
.. automodule:: fastNLP.io.data_loader | .. automodule:: fastNLP.io.data_loader | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.io.dataset\_loader | |||||
========================== | ========================== | ||||
.. automodule:: fastNLP.io.dataset_loader | .. automodule:: fastNLP.io.dataset_loader | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.io.embed\_loader | |||||
======================== | ======================== | ||||
.. automodule:: fastNLP.io.embed_loader | .. automodule:: fastNLP.io.embed_loader | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.io.model\_io | |||||
==================== | ==================== | ||||
.. automodule:: fastNLP.io.model_io | .. automodule:: fastNLP.io.model_io | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,19 +2,18 @@ fastNLP.io | |||||
========== | ========== | ||||
.. automodule:: fastNLP.io | .. automodule:: fastNLP.io | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | 子模块 | ||||
---------- | ---------- | ||||
.. toctree:: | .. toctree:: | ||||
:titlesonly: | |||||
:maxdepth: 1 | |||||
fastNLP.io.data_loader | |||||
fastNLP.io.base_loader | fastNLP.io.base_loader | ||||
fastNLP.io.dataset_loader | |||||
fastNLP.io.embed_loader | fastNLP.io.embed_loader | ||||
fastNLP.io.dataset_loader | |||||
fastNLP.io.data_loader | |||||
fastNLP.io.model_io | fastNLP.io.model_io | ||||
@@ -2,6 +2,6 @@ fastNLP.models.biaffine\_parser | |||||
=============================== | =============================== | ||||
.. automodule:: fastNLP.models.biaffine_parser | .. automodule:: fastNLP.models.biaffine_parser | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.models.cnn\_text\_classification | |||||
======================================== | ======================================== | ||||
.. automodule:: fastNLP.models.cnn_text_classification | .. automodule:: fastNLP.models.cnn_text_classification | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,19 +2,18 @@ fastNLP.models | |||||
============== | ============== | ||||
.. automodule:: fastNLP.models | .. automodule:: fastNLP.models | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | 子模块 | ||||
---------- | ---------- | ||||
.. toctree:: | .. toctree:: | ||||
:titlesonly: | |||||
:maxdepth: 1 | |||||
fastNLP.models.biaffine_parser | fastNLP.models.biaffine_parser | ||||
fastNLP.models.cnn_text_classification | fastNLP.models.cnn_text_classification | ||||
fastNLP.models.sequence_labeling | fastNLP.models.sequence_labeling | ||||
fastNLP.models.snli | fastNLP.models.snli | ||||
fastNLP.models.star_transformer | fastNLP.models.star_transformer | ||||
@@ -2,6 +2,6 @@ fastNLP.models.sequence\_labeling | |||||
================================= | ================================= | ||||
.. automodule:: fastNLP.models.sequence_labeling | .. automodule:: fastNLP.models.sequence_labeling | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.models.snli | |||||
=================== | =================== | ||||
.. automodule:: fastNLP.models.snli | .. automodule:: fastNLP.models.snli | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,6 +2,6 @@ fastNLP.models.star\_transformer | |||||
================================ | ================================ | ||||
.. automodule:: fastNLP.models.star_transformer | .. automodule:: fastNLP.models.star_transformer | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.decoder.CRF | |||||
=========================== | |||||
.. automodule:: fastNLP.modules.decoder.crf | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.decoder.MLP | |||||
=========================== | |||||
.. automodule:: fastNLP.modules.decoder.mlp | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,17 +2,7 @@ fastNLP.modules.decoder | |||||
======================= | ======================= | ||||
.. automodule:: fastNLP.modules.decoder | .. automodule:: fastNLP.modules.decoder | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | |||||
---------- | |||||
.. toctree:: | |||||
:titlesonly: | |||||
fastNLP.modules.decoder.crf | |||||
fastNLP.modules.decoder.mlp | |||||
fastNLP.modules.decoder.utils | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.decoder.utils | |||||
============================= | |||||
.. automodule:: fastNLP.modules.decoder.utils | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.bert | |||||
============================ | |||||
.. automodule:: fastNLP.modules.encoder.bert | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.char\_encoder | |||||
===================================== | |||||
.. automodule:: fastNLP.modules.encoder.char_encoder | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.conv\_maxpool | |||||
===================================== | |||||
.. automodule:: fastNLP.modules.encoder.conv_maxpool | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.embedding | |||||
================================= | |||||
.. automodule:: fastNLP.modules.encoder.embedding | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.lstm | |||||
============================ | |||||
.. automodule:: fastNLP.modules.encoder.lstm | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,22 +2,6 @@ fastNLP.modules.encoder | |||||
======================= | ======================= | ||||
.. automodule:: fastNLP.modules.encoder | .. automodule:: fastNLP.modules.encoder | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | |||||
---------- | |||||
.. toctree:: | |||||
:titlesonly: | |||||
fastNLP.modules.encoder.bert | |||||
fastNLP.modules.encoder.char_encoder | |||||
fastNLP.modules.encoder.conv_maxpool | |||||
fastNLP.modules.encoder.embedding | |||||
fastNLP.modules.encoder.lstm | |||||
fastNLP.modules.encoder.star_transformer | |||||
fastNLP.modules.encoder.transformer | |||||
fastNLP.modules.encoder.variational_rnn | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.star\_transformer | |||||
========================================= | |||||
.. automodule:: fastNLP.modules.encoder.star_transformer | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.transformer | |||||
=================================== | |||||
.. automodule:: fastNLP.modules.encoder.transformer | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -1,7 +0,0 @@ | |||||
fastNLP.modules.encoder.variational\_rnn | |||||
======================================== | |||||
.. automodule:: fastNLP.modules.encoder.variational_rnn | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -2,15 +2,16 @@ fastNLP.modules | |||||
=============== | =============== | ||||
.. automodule:: fastNLP.modules | .. automodule:: fastNLP.modules | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
子模块 | 子模块 | ||||
----------- | ----------- | ||||
.. toctree:: | .. toctree:: | ||||
:titlesonly: | |||||
:titlesonly: | |||||
:maxdepth: 1 | |||||
fastNLP.modules.decoder | |||||
fastNLP.modules.encoder | |||||
fastNLP.modules.decoder | |||||
fastNLP.modules.encoder |
@@ -2,19 +2,18 @@ API 文档 | |||||
=============== | =============== | ||||
.. automodule:: fastNLP | .. automodule:: fastNLP | ||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
内部模块 | 内部模块 | ||||
----------- | ----------- | ||||
.. toctree:: | .. toctree:: | ||||
:titlesonly: | |||||
:maxdepth: 3 | |||||
fastNLP.core | |||||
fastNLP.io | |||||
fastNLP.modules | |||||
fastNLP.models | |||||
:maxdepth: 1 | |||||
fastNLP.core | |||||
fastNLP.embeddings | |||||
fastNLP.io | |||||
fastNLP.models | |||||
fastNLP.modules |
@@ -1,60 +1,28 @@ | |||||
fastNLP 中文文档 | fastNLP 中文文档 | ||||
===================== | ===================== | ||||
fastNLP 是一款轻量级的 NLP 处理套件。你既可以使用它快速地完成一个命名实体识别(NER)、中文分词或文本分类任务; | |||||
也可以使用他构建许多复杂的网络模型,进行科研。它具有如下的特性: | |||||
`fastNLP <https://github.com/fastnlp/fastNLP/>`_ 是一款轻量级的 NLP 处理套件。你既可以使用它快速地完成一个序列标注 | |||||
(NER、POS-Tagging等)、中文分词、文本分类、Matching、指代消解、摘要等任务 | |||||
(详见 `reproduction <https://github.com/fastnlp/fastNLP/tree/master/reproduction>`_ ); | |||||
也可以使用它构建许多复杂的网络模型,进行科研。它具有如下的特性: | |||||
- 统一的Tabular式数据容器,让数据预处理过程简洁明了。内置多种数据集的DataSet Loader,省去预处理代码。 | |||||
- 各种方便的NLP工具,例如预处理embedding加载; 中间数据cache等; | |||||
- 详尽的中文文档以供查阅; | |||||
- 提供诸多高级模块,例如Variational LSTM, Transformer, CRF等; | |||||
- 封装CNNText,Biaffine等模型可供直接使用; | |||||
- 便捷且具有扩展性的训练器; 提供多种内置callback函数,方便实验记录、异常捕获等。 | |||||
- 统一的Tabular式数据容器,让数据预处理过程简洁明了。内置多种数据集的 :mod:`~fastNLP.io.data_loader` ,省去预处理代码; | |||||
- 多种训练、测试组件,例如训练器 :class:`~fastNLP.Trainer` ;测试器 :class:`~fastNLP.Tester` ;以及各种评测 :mod:`~fastNLP.core.metrics` 等等; | |||||
- 各种方便的NLP工具,例如预处理 :mod:`embedding<fastNLP.embeddings>` 加载(包括ELMo和BERT); 中间数据存储 :func:`cache <fastNLP.cache_results>` 等; | |||||
- 提供诸多高级模块 :mod:`~fastNLP.modules`,例如 :class:`~fastNLP.modules.VarLSTM` , :class:`Transformer<fastNLP.modules.TransformerEncoder>` , :class:`CRF<fastNLP.modules.ConditionalRandomField>` 等; | |||||
- 在序列标注、中文分词、文本分类、Matching、指代消解、摘要等任务上封装了各种 :mod:`~fastNLP.models` 可供直接使用; | |||||
- 训练器便捷且具有扩展性,提供多种内置 :mod:`~fastNLP.core.callback` 函数,方便实验记录、异常捕获等。 | |||||
内置组件 | |||||
------------ | |||||
大部分用于的 NLP 任务神经网络都可以看做由编码(encoder)、聚合(aggregator)、解码(decoder)三种模块组成。 | |||||
.. image:: figures/text_classification.png | |||||
fastNLP 在 :mod:`~fastNLP.modules` 模块中内置了三种模块的诸多组件,可以帮助用户快速搭建自己所需的网络。 | |||||
三种模块的功能和常见组件如下: | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
| module type | functionality | example | | |||||
+=======================+=======================+=======================+ | |||||
| encoder | 将输入编码为具有具 | embedding, RNN, CNN, | | |||||
| | 有表示能力的向量 | transformer | | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
| aggregator | 从多个向量中聚合信息 | self-attention, | | |||||
| | | max-pooling | | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
| decoder | 将具有某种表示意义的 | MLP, CRF | | |||||
| | 向量解码为需要的输出 | | | |||||
| | 形式 | | | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
内置模型 | |||||
---------------- | |||||
fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models.CNNText` 、 | |||||
:class:`~fastNLP.models.SeqLabeling` 等完整的模型,以供用户直接使用。 | |||||
.. todo:: | |||||
这些模型的介绍如下表所示:(模型名称 + 介绍 + 任务上的结果) | |||||
用户手册 | 用户手册 | ||||
---------------- | ---------------- | ||||
.. toctree:: | .. toctree:: | ||||
:maxdepth: 1 | |||||
:maxdepth: 2 | |||||
安装指南 </user/installation> | 安装指南 </user/installation> | ||||
快速入门 </user/quickstart> | 快速入门 </user/quickstart> | ||||
详细指南 </user/tutorials> | |||||
详细教程 </user/tutorials> | |||||
API 文档 | API 文档 | ||||
------------- | ------------- | ||||
@@ -67,11 +35,11 @@ API 文档 | |||||
fastNLP | fastNLP | ||||
fitlog | |||||
------ | |||||
fitlog文档 | |||||
---------- | |||||
用户可以 `点此 <https://fitlog.readthedocs.io/zh/latest/>`_ 查看fitlog的文档。 | |||||
fitlog 是由我们团队开发,用于帮助用户记录日志并管理代码的工具 | |||||
您可以 `点此 <https://fitlog.readthedocs.io/zh/latest/>`_ 查看fitlog的文档。 | |||||
fitlog 是由我们团队开发的日志记录+代码管理的工具。 | |||||
索引与搜索 | 索引与搜索 | ||||
================== | ================== | ||||
@@ -60,7 +60,7 @@ | |||||
seq_len=3) | seq_len=3) | ||||
]) | ]) | ||||
在初步构建完数据集之后,我们可可以通过 `for` 循环遍历 :class:`~fastNLP.DataSet` 中的内容。 | |||||
在初步构建完数据集之后,我们可以通过 `for` 循环遍历 :class:`~fastNLP.DataSet` 中的内容。 | |||||
.. code-block:: python | .. code-block:: python | ||||
@@ -35,12 +35,12 @@ Part II: 数据集的使用方式 | |||||
- _load 函数:从一个数据文件中读取数据到一个 :class:`~fastNLP.DataSet` | - _load 函数:从一个数据文件中读取数据到一个 :class:`~fastNLP.DataSet` | ||||
- load 函数(可以使用基类的方法):从一个或多个数据文件中读取数据到一个或多个 :class:`~fastNLP.DataSet` | - load 函数(可以使用基类的方法):从一个或多个数据文件中读取数据到一个或多个 :class:`~fastNLP.DataSet` | ||||
- process 函数:一个或多个从数据文件中读取数据,并处理成可以训练的 :class:`~fastNLP.io.DataInfo` | |||||
- process 函数:一个或多个从数据文件中读取数据,并处理成可以训练的 :class:`~fastNLP.io.DataBundle` | |||||
**\*process函数中可以调用load函数或_load函数** | **\*process函数中可以调用load函数或_load函数** | ||||
DataSetLoader的_load或者load函数返回的 :class:`~fastNLP.DataSet` 当中,内容为数据集的文本信息,process函数返回的 | DataSetLoader的_load或者load函数返回的 :class:`~fastNLP.DataSet` 当中,内容为数据集的文本信息,process函数返回的 | ||||
:class:`~fastNLP.io.DataInfo` 当中, `datasets` 的内容为已经index好的、可以直接被 :class:`~fastNLP.Trainer` | |||||
:class:`~fastNLP.io.DataBundle` 当中, `datasets` 的内容为已经index好的、可以直接被 :class:`~fastNLP.Trainer` | |||||
接受的内容。 | 接受的内容。 | ||||
-------------------------------------------------------- | -------------------------------------------------------- | ||||
@@ -45,7 +45,7 @@ fastNLP可以方便地载入各种类型的数据。同时,针对常见的数 | |||||
数据处理 | 数据处理 | ||||
---------------------------- | ---------------------------- | ||||
我们进一步处理数据。将数据和词表封装在 :class:`~fastNLP.DataInfo` 类中。data是DataInfo的实例。 | |||||
我们进一步处理数据。将数据和词表封装在 :class:`~fastNLP.DataBundle` 类中。data是DataBundle的实例。 | |||||
我们输入模型的数据包括char embedding,以及word embedding。在数据处理部分,我们尝试完成词表的构建。 | 我们输入模型的数据包括char embedding,以及word embedding。在数据处理部分,我们尝试完成词表的构建。 | ||||
使用fastNLP中的Vocabulary类来构建词表。 | 使用fastNLP中的Vocabulary类来构建词表。 | ||||
@@ -181,7 +181,7 @@ FastNLP 完全支持使用 pyTorch 编写的模型,但与 pyTorch 中编写模 | |||||
) | ) | ||||
) | ) | ||||
FastNLP 中包含的各种模块如下表,您可以点击具体的名称查看详细的 API: | |||||
FastNLP 中包含的各种模块如下表,您可以点击具体的名称查看详细的 API,也可以通过 :doc:`/fastNLP.modules` 进行了解。 | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: 名称, 介绍 | :header: 名称, 介绍 | ||||
@@ -189,7 +189,6 @@ FastNLP 中包含的各种模块如下表,您可以点击具体的名称查看 | |||||
:class:`~fastNLP.modules.ConvolutionCharEncoder` , char级别的卷积 encoder | :class:`~fastNLP.modules.ConvolutionCharEncoder` , char级别的卷积 encoder | ||||
:class:`~fastNLP.modules.LSTMCharEncoder` , char级别基于LSTM的 encoder | :class:`~fastNLP.modules.LSTMCharEncoder` , char级别基于LSTM的 encoder | ||||
:class:`~fastNLP.modules.ConvMaxpool` , 结合了Convolution和Max-Pooling于一体的模块 | :class:`~fastNLP.modules.ConvMaxpool` , 结合了Convolution和Max-Pooling于一体的模块 | ||||
:class:`~fastNLP.modules.Embedding` , 基础的Embedding模块 | |||||
:class:`~fastNLP.modules.LSTM` , LSTM模块, 轻量封装了PyTorch的LSTM | :class:`~fastNLP.modules.LSTM` , LSTM模块, 轻量封装了PyTorch的LSTM | ||||
:class:`~fastNLP.modules.StarTransformer` , Star-Transformer 的encoder部分 | :class:`~fastNLP.modules.StarTransformer` , Star-Transformer 的encoder部分 | ||||
:class:`~fastNLP.modules.TransformerEncoder` , Transformer的encoder模块,不包含embedding层 | :class:`~fastNLP.modules.TransformerEncoder` , Transformer的encoder模块,不包含embedding层 | ||||
@@ -198,8 +197,11 @@ FastNLP 中包含的各种模块如下表,您可以点击具体的名称查看 | |||||
:class:`~fastNLP.modules.VarGRU` , Variational Dropout GRU 模块 | :class:`~fastNLP.modules.VarGRU` , Variational Dropout GRU 模块 | ||||
:class:`~fastNLP.modules.MaxPool` , Max-pooling模块 | :class:`~fastNLP.modules.MaxPool` , Max-pooling模块 | ||||
:class:`~fastNLP.modules.MaxPoolWithMask` , 带mask矩阵的max pooling。在做 max-pooling的时候不会考虑mask值为0的位置。 | :class:`~fastNLP.modules.MaxPoolWithMask` , 带mask矩阵的max pooling。在做 max-pooling的时候不会考虑mask值为0的位置。 | ||||
:class:`~fastNLP.modules.AvgPool` , Average-pooling模块 | |||||
:class:`~fastNLP.modules.AvgPoolWithMask` , 带mask矩阵的average pooling。在做 average-pooling的时候不会考虑mask值为0的位置。 | |||||
:class:`~fastNLP.modules.MultiHeadAttention` , MultiHead Attention 模块 | :class:`~fastNLP.modules.MultiHeadAttention` , MultiHead Attention 模块 | ||||
:class:`~fastNLP.modules.MLP` , 简单的多层感知器模块 | :class:`~fastNLP.modules.MLP` , 简单的多层感知器模块 | ||||
:class:`~fastNLP.modules.ConditionalRandomField` , 条件随机场模块 | :class:`~fastNLP.modules.ConditionalRandomField` , 条件随机场模块 | ||||
:class:`~fastNLP.modules.viterbi_decode` , 给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 (与 :class:`~fastNLP.modules.ConditionalRandomField` 配合使用) | :class:`~fastNLP.modules.viterbi_decode` , 给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 (与 :class:`~fastNLP.modules.ConditionalRandomField` 配合使用) | ||||
:class:`~fastNLP.modules.allowed_transitions` , 给定一个id到label的映射表,返回所有可以跳转的列表(与 :class:`~fastNLP.modules.ConditionalRandomField` 配合使用) | :class:`~fastNLP.modules.allowed_transitions` , 给定一个id到label的映射表,返回所有可以跳转的列表(与 :class:`~fastNLP.modules.ConditionalRandomField` 配合使用) | ||||
:class:`~fastNLP.modules.TimestepDropout` , 简单包装过的Dropout 组件 |
@@ -44,10 +44,10 @@ Callback的构建和使用 | |||||
这里,:class:`~fastNLP.Callback` 中所有以 ``on_`` 开头的类方法会在 :class:`~fastNLP.Trainer` 的训练中在特定时间调用。 | 这里,:class:`~fastNLP.Callback` 中所有以 ``on_`` 开头的类方法会在 :class:`~fastNLP.Trainer` 的训练中在特定时间调用。 | ||||
如 on_train_begin() 会在训练开始时被调用,on_epoch_end() 会在每个 epoch 结束时调用。 | 如 on_train_begin() 会在训练开始时被调用,on_epoch_end() 会在每个 epoch 结束时调用。 | ||||
具体有哪些类方法,参见文档。 | |||||
具体有哪些类方法,参见文档 :class:`~fastNLP.Callback` 。 | |||||
另外,为了使用方便,可以在 :class:`~fastNLP.Callback` 内部访问 :class:`~fastNLP.Trainer` 中的属性,如 optimizer, epoch, step,分别对应训练时的优化器,当前epoch数,和当前的总step数。 | 另外,为了使用方便,可以在 :class:`~fastNLP.Callback` 内部访问 :class:`~fastNLP.Trainer` 中的属性,如 optimizer, epoch, step,分别对应训练时的优化器,当前epoch数,和当前的总step数。 | ||||
具体可访问的属性,参见文档。 | |||||
具体可访问的属性,参见文档 :class:`~fastNLP.Callback` 。 | |||||
使用Callback | 使用Callback | ||||
在定义好 :class:`~fastNLP.Callback` 之后,就能将它传入Trainer的 ``callbacks`` 参数,在实际训练时使用。 | 在定义好 :class:`~fastNLP.Callback` 之后,就能将它传入Trainer的 ``callbacks`` 参数,在实际训练时使用。 | ||||
@@ -1,18 +1,20 @@ | |||||
=================== | |||||
fastNLP详细使用教程 | |||||
=================== | |||||
======================== | |||||
fastNLP 详细使用教程 | |||||
======================== | |||||
这里是更详细的使用教程。对于大部分的用户,我们建议你从第一篇开始顺序阅读;如果你只想了解其中的一部分,也可以进行选读。 | |||||
.. toctree:: | .. toctree:: | ||||
:maxdepth: 1 | :maxdepth: 1 | ||||
1. 使用DataSet预处理文本 </tutorials/tutorial_1_data_preprocess> | |||||
2. 使用DataSetLoader加载数据集 </tutorials/tutorial_2_load_dataset> | |||||
3. 使用Embedding模块将文本转成向量 </tutorials/tutorial_3_embedding> | |||||
4. 动手实现一个文本分类器I-使用Trainer和Tester快速训练和测试 </tutorials/tutorial_4_loss_optimizer> | |||||
5. 动手实现一个文本分类器II-使用DataSetIter实现自定义训练过程 </tutorials/tutorial_5_datasetiter> | |||||
6. 快速实现序列标注模型 </tutorials/tutorial_6_seq_labeling> | |||||
7. 使用Modules和Models快速搭建自定义模型 </tutorials/tutorial_7_modules_models> | |||||
8. 使用Metric快速评测你的模型 </tutorials/tutorial_8_metrics> | |||||
9. 使用Callback自定义你的训练过程 </tutorials/tutorial_9_callback> | |||||
10. 使用fitlog 辅助 fastNLP 进行科研 </tutorials/tutorial_10_fitlog> | |||||
使用DataSet预处理文本 </tutorials/tutorial_1_data_preprocess> | |||||
使用DataSetLoader加载数据集 </tutorials/tutorial_2_load_dataset> | |||||
使用Embedding模块将文本转成向量 </tutorials/tutorial_3_embedding> | |||||
动手实现一个文本分类器I-使用Trainer和Tester快速训练和测试 </tutorials/tutorial_4_loss_optimizer> | |||||
动手实现一个文本分类器II-使用DataSetIter实现自定义训练过程 </tutorials/tutorial_5_datasetiter> | |||||
快速实现序列标注模型 </tutorials/tutorial_6_seq_labeling> | |||||
使用Modules和Models快速搭建自定义模型 </tutorials/tutorial_7_modules_models> | |||||
使用Metric快速评测你的模型 </tutorials/tutorial_8_metrics> | |||||
使用Callback自定义你的训练过程 </tutorials/tutorial_9_callback> | |||||
使用fitlog 辅助 fastNLP 进行科研 </tutorials/tutorial_10_fitlog> | |||||
@@ -1,11 +1,12 @@ | |||||
""" | """ | ||||
fastNLP 由 :mod:`~fastNLP.core` 、 :mod:`~fastNLP.io` 、:mod:`~fastNLP.modules`、:mod:`~fastNLP.models` | |||||
等子模块组成,你可以点进去查看每个模块的文档。 | |||||
fastNLP 由 :mod:`~fastNLP.core` 、 :mod:`~fastNLP.io` 、:mod:`~fastNLP.embeddings` 、 :mod:`~fastNLP.modules`、 | |||||
:mod:`~fastNLP.models` 等子模块组成,你可以查看每个模块的文档。 | |||||
- :mod:`~fastNLP.core` 是fastNLP 的核心模块,包括 DataSet、 Trainer、 Tester 等组件。详见文档 :doc:`/fastNLP.core` | - :mod:`~fastNLP.core` 是fastNLP 的核心模块,包括 DataSet、 Trainer、 Tester 等组件。详见文档 :doc:`/fastNLP.core` | ||||
- :mod:`~fastNLP.io` 是实现输入输出的模块,包括了数据集的读取,模型的存取等功能。详见文档 :doc:`/fastNLP.io` | - :mod:`~fastNLP.io` 是实现输入输出的模块,包括了数据集的读取,模型的存取等功能。详见文档 :doc:`/fastNLP.io` | ||||
- :mod:`~fastNLP.embeddings` 提供用于构建复杂网络模型所需的各种embedding。详见文档 :doc:`/fastNLP.embeddings` | |||||
- :mod:`~fastNLP.modules` 包含了用于搭建神经网络模型的诸多组件,可以帮助用户快速搭建自己所需的网络。详见文档 :doc:`/fastNLP.modules` | - :mod:`~fastNLP.modules` 包含了用于搭建神经网络模型的诸多组件,可以帮助用户快速搭建自己所需的网络。详见文档 :doc:`/fastNLP.modules` | ||||
- :mod:`~fastNLP.models` 包含了一些使用 fastNLP 实现的完整网络模型,包括 :class:`~fastNLP.models.CNNText` 、 :class:`~fastNLP.models.SeqLabeling` 等常见模型。详见文档 :doc:`/fastNLP.models` | |||||
- :mod:`~fastNLP.models` 包含了一些使用 fastNLP 实现的完整网络模型,包括 :class:`~fastNLP.models.CNNText` 、 :class:`~fastNLP.models.SeqLabeling` 等常见模型。详见文档 :doc:`fastNLP.models` | |||||
fastNLP 中最常用的组件可以直接从 fastNLP 包中 import ,他们的文档如下: | fastNLP 中最常用的组件可以直接从 fastNLP 包中 import ,他们的文档如下: | ||||
""" | """ | ||||
@@ -61,4 +62,5 @@ __version__ = '0.4.5' | |||||
from .core import * | from .core import * | ||||
from . import models | from . import models | ||||
from . import modules | from . import modules | ||||
from . import embeddings | |||||
from .io import data_loader | from .io import data_loader |
@@ -3,36 +3,40 @@ | |||||
1. 用于读入 embedding 的 :doc:`EmbedLoader <fastNLP.io.embed_loader>` 类, | 1. 用于读入 embedding 的 :doc:`EmbedLoader <fastNLP.io.embed_loader>` 类, | ||||
2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | |||||
2. 用于读入不同格式数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | |||||
3. 用于保存和载入模型的类, 参考 :doc:`/fastNLP.io.model_io` | |||||
3. 用于读入不同数据集并进行预处理的 :doc:`DataLoader <fastNLP.io.data_loader>` 类 | |||||
4. 用于保存和载入模型的类, 参考 :doc:`model_io文档</fastNLP.io.model_io>` | |||||
这些类的使用方法如下: | 这些类的使用方法如下: | ||||
""" | """ | ||||
__all__ = [ | __all__ = [ | ||||
'EmbedLoader', | 'EmbedLoader', | ||||
'DataBundle', | |||||
'DataSetLoader', | |||||
'CSVLoader', | 'CSVLoader', | ||||
'JsonLoader', | 'JsonLoader', | ||||
'ModelLoader', | |||||
'ModelSaver', | |||||
'DataBundle', | |||||
'DataSetLoader', | |||||
'ConllLoader', | 'ConllLoader', | ||||
'Conll2003Loader', | 'Conll2003Loader', | ||||
'IMDBLoader', | 'IMDBLoader', | ||||
'MatchingLoader', | 'MatchingLoader', | ||||
'PeopleDailyCorpusLoader', | |||||
'SNLILoader', | 'SNLILoader', | ||||
'SSTLoader', | |||||
'SST2Loader', | |||||
'MNLILoader', | 'MNLILoader', | ||||
'MTL16Loader', | |||||
'PeopleDailyCorpusLoader', | |||||
'QNLILoader', | 'QNLILoader', | ||||
'QuoraLoader', | 'QuoraLoader', | ||||
'RTELoader', | 'RTELoader', | ||||
'SSTLoader', | |||||
'SST2Loader', | |||||
'YelpLoader', | |||||
'ModelLoader', | |||||
'ModelSaver', | |||||
] | ] | ||||
from .embed_loader import EmbedLoader | from .embed_loader import EmbedLoader | ||||
@@ -1,13 +1,14 @@ | |||||
""" | """ | ||||
用于读数据集的模块, 可以读取文本分类、序列标注、Matching任务的数据集 | 用于读数据集的模块, 可以读取文本分类、序列标注、Matching任务的数据集 | ||||
这些模块的使用方法如下: | |||||
这些模块的具体介绍如下,您可以通过阅读 :doc:`教程</tutorials/tutorial_2_load_dataset>` 来进行了解。 | |||||
""" | """ | ||||
__all__ = [ | __all__ = [ | ||||
'ConllLoader', | 'ConllLoader', | ||||
'Conll2003Loader', | 'Conll2003Loader', | ||||
'IMDBLoader', | 'IMDBLoader', | ||||
'MatchingLoader', | 'MatchingLoader', | ||||
'SNLILoader', | |||||
'MNLILoader', | 'MNLILoader', | ||||
'MTL16Loader', | 'MTL16Loader', | ||||
'PeopleDailyCorpusLoader', | 'PeopleDailyCorpusLoader', | ||||
@@ -16,7 +17,6 @@ __all__ = [ | |||||
'RTELoader', | 'RTELoader', | ||||
'SSTLoader', | 'SSTLoader', | ||||
'SST2Loader', | 'SST2Loader', | ||||
'SNLILoader', | |||||
'YelpLoader', | 'YelpLoader', | ||||
] | ] | ||||
@@ -58,7 +58,7 @@ class ConllLoader(DataSetLoader): | |||||
class Conll2003Loader(ConllLoader): | class Conll2003Loader(ConllLoader): | ||||
""" | """ | ||||
别名::class:`fastNLP.io.Conll2003Loader` :class:`fastNLP.io.dataset_loader.Conll2003Loader` | |||||
别名::class:`fastNLP.io.Conll2003Loader` :class:`fastNLP.io.data_loader.Conll2003Loader` | |||||
读取Conll2003数据 | 读取Conll2003数据 | ||||
@@ -7,7 +7,7 @@ from ...core.const import Const | |||||
class PeopleDailyCorpusLoader(DataSetLoader): | class PeopleDailyCorpusLoader(DataSetLoader): | ||||
""" | """ | ||||
别名::class:`fastNLP.io.PeopleDailyCorpusLoader` :class:`fastNLP.io.dataset_loader.PeopleDailyCorpusLoader` | |||||
别名::class:`fastNLP.io.PeopleDailyCorpusLoader` :class:`fastNLP.io.data_loader.PeopleDailyCorpusLoader` | |||||
读取人民日报数据集 | 读取人民日报数据集 | ||||
""" | """ | ||||
@@ -130,6 +130,8 @@ def _find_cycle(vertices, edges): | |||||
class GraphParser(BaseModel): | class GraphParser(BaseModel): | ||||
""" | """ | ||||
别名::class:`fastNLP.models.GraphParser` :class:`fastNLP.models.baffine_parser.GraphParser` | |||||
基于图的parser base class, 支持贪婪解码和最大生成树解码 | 基于图的parser base class, 支持贪婪解码和最大生成树解码 | ||||
""" | """ | ||||
@@ -25,14 +25,14 @@ class CNNText(torch.nn.Module): | |||||
:param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | :param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | ||||
:param float dropout: Dropout的大小 | :param float dropout: Dropout的大小 | ||||
""" | """ | ||||
def __init__(self, init_embed, | def __init__(self, init_embed, | ||||
num_classes, | num_classes, | ||||
kernel_nums=(30, 40, 50), | kernel_nums=(30, 40, 50), | ||||
kernel_sizes=(1, 3, 5), | kernel_sizes=(1, 3, 5), | ||||
dropout=0.5): | dropout=0.5): | ||||
super(CNNText, self).__init__() | super(CNNText, self).__init__() | ||||
# no support for pre-trained embedding currently | # no support for pre-trained embedding currently | ||||
self.embed = embedding.Embedding(init_embed) | self.embed = embedding.Embedding(init_embed) | ||||
self.conv_pool = encoder.ConvMaxpool( | self.conv_pool = encoder.ConvMaxpool( | ||||
@@ -41,7 +41,7 @@ class CNNText(torch.nn.Module): | |||||
kernel_sizes=kernel_sizes) | kernel_sizes=kernel_sizes) | ||||
self.dropout = nn.Dropout(dropout) | self.dropout = nn.Dropout(dropout) | ||||
self.fc = nn.Linear(sum(kernel_nums), num_classes) | self.fc = nn.Linear(sum(kernel_nums), num_classes) | ||||
def forward(self, words, seq_len=None): | def forward(self, words, seq_len=None): | ||||
""" | """ | ||||
@@ -58,7 +58,7 @@ class CNNText(torch.nn.Module): | |||||
x = self.dropout(x) | x = self.dropout(x) | ||||
x = self.fc(x) # [N,C] -> [N, N_class] | x = self.fc(x) # [N,C] -> [N, N_class] | ||||
return {C.OUTPUT: x} | return {C.OUTPUT: x} | ||||
def predict(self, words, seq_len=None): | def predict(self, words, seq_len=None): | ||||
""" | """ | ||||
:param torch.LongTensor words: [batch_size, seq_len],句子中word的index | :param torch.LongTensor words: [batch_size, seq_len],句子中word的index | ||||
@@ -1,10 +1,10 @@ | |||||
""" | """ | ||||
本模块实现了两种序列标注模型 | |||||
本模块实现了几种序列标注模型 | |||||
""" | """ | ||||
__all__ = [ | __all__ = [ | ||||
"SeqLabeling", | "SeqLabeling", | ||||
"AdvSeqLabel", | "AdvSeqLabel", | ||||
"BiLSTMCRF" | |||||
# "BiLSTMCRF" | |||||
] | ] | ||||
import torch | import torch | ||||
@@ -25,7 +25,10 @@ from ..modules import ConditionalRandomField | |||||
class BiLSTMCRF(BaseModel): | class BiLSTMCRF(BaseModel): | ||||
""" | """ | ||||
结构为BiLSTM + FC + Dropout + CRF. | 结构为BiLSTM + FC + Dropout + CRF. | ||||
TODO 补充文档 | |||||
.. todo:: | |||||
继续补充文档 | |||||
:param embed: tuple: | :param embed: tuple: | ||||
:param num_classes: | :param num_classes: | ||||
:param num_layers: | :param num_layers: | ||||
@@ -15,7 +15,10 @@ from ..core.utils import seq_len_to_mask | |||||
class ESIM(BaseModel): | class ESIM(BaseModel): | ||||
"""ESIM model的一个PyTorch实现 | |||||
""" | |||||
别名::class:`fastNLP.models.ESIM` :class:`fastNLP.models.snli.ESIM` | |||||
ESIM model的一个PyTorch实现 | |||||
论文参见: https://arxiv.org/pdf/1609.06038.pdf | 论文参见: https://arxiv.org/pdf/1609.06038.pdf | ||||
:param fastNLP.TokenEmbedding init_embedding: 初始化的TokenEmbedding | :param fastNLP.TokenEmbedding init_embedding: 初始化的TokenEmbedding | ||||
@@ -34,7 +34,7 @@ class StarTransEnc(nn.Module): | |||||
:param emb_dropout: 词嵌入的dropout概率. | :param emb_dropout: 词嵌入的dropout概率. | ||||
:param dropout: 模型除词嵌入外的dropout概率. | :param dropout: 模型除词嵌入外的dropout概率. | ||||
""" | """ | ||||
def __init__(self, init_embed, | def __init__(self, init_embed, | ||||
hidden_size, | hidden_size, | ||||
num_layers, | num_layers, | ||||
@@ -54,7 +54,7 @@ class StarTransEnc(nn.Module): | |||||
head_dim=head_dim, | head_dim=head_dim, | ||||
dropout=dropout, | dropout=dropout, | ||||
max_len=max_len) | max_len=max_len) | ||||
def forward(self, x, mask): | def forward(self, x, mask): | ||||
""" | """ | ||||
:param FloatTensor x: [batch, length, hidden] 输入的序列 | :param FloatTensor x: [batch, length, hidden] 输入的序列 | ||||
@@ -79,7 +79,7 @@ class _Cls(nn.Module): | |||||
nn.Dropout(dropout), | nn.Dropout(dropout), | ||||
nn.Linear(hid_dim, num_cls), | nn.Linear(hid_dim, num_cls), | ||||
) | ) | ||||
def forward(self, x): | def forward(self, x): | ||||
h = self.fc(x) | h = self.fc(x) | ||||
return h | return h | ||||
@@ -95,7 +95,7 @@ class _NLICls(nn.Module): | |||||
nn.Dropout(dropout), | nn.Dropout(dropout), | ||||
nn.Linear(hid_dim, num_cls), | nn.Linear(hid_dim, num_cls), | ||||
) | ) | ||||
def forward(self, x1, x2): | def forward(self, x1, x2): | ||||
x = torch.cat([x1, x2, torch.abs(x1 - x2), x1 * x2], 1) | x = torch.cat([x1, x2, torch.abs(x1 - x2), x1 * x2], 1) | ||||
h = self.fc(x) | h = self.fc(x) | ||||
@@ -121,7 +121,7 @@ class STSeqLabel(nn.Module): | |||||
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | ||||
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | ||||
""" | """ | ||||
def __init__(self, init_embed, num_cls, | def __init__(self, init_embed, num_cls, | ||||
hidden_size=300, | hidden_size=300, | ||||
num_layers=4, | num_layers=4, | ||||
@@ -141,7 +141,7 @@ class STSeqLabel(nn.Module): | |||||
emb_dropout=emb_dropout, | emb_dropout=emb_dropout, | ||||
dropout=dropout) | dropout=dropout) | ||||
self.cls = _Cls(hidden_size, num_cls, cls_hidden_size) | self.cls = _Cls(hidden_size, num_cls, cls_hidden_size) | ||||
def forward(self, words, seq_len): | def forward(self, words, seq_len): | ||||
""" | """ | ||||
@@ -154,7 +154,7 @@ class STSeqLabel(nn.Module): | |||||
output = self.cls(nodes) | output = self.cls(nodes) | ||||
output = output.transpose(1, 2) # make hidden to be dim 1 | output = output.transpose(1, 2) # make hidden to be dim 1 | ||||
return {Const.OUTPUT: output} # [bsz, n_cls, seq_len] | return {Const.OUTPUT: output} # [bsz, n_cls, seq_len] | ||||
def predict(self, words, seq_len): | def predict(self, words, seq_len): | ||||
""" | """ | ||||
@@ -186,7 +186,7 @@ class STSeqCls(nn.Module): | |||||
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | ||||
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | ||||
""" | """ | ||||
def __init__(self, init_embed, num_cls, | def __init__(self, init_embed, num_cls, | ||||
hidden_size=300, | hidden_size=300, | ||||
num_layers=4, | num_layers=4, | ||||
@@ -206,7 +206,7 @@ class STSeqCls(nn.Module): | |||||
emb_dropout=emb_dropout, | emb_dropout=emb_dropout, | ||||
dropout=dropout) | dropout=dropout) | ||||
self.cls = _Cls(hidden_size, num_cls, cls_hidden_size, dropout=dropout) | self.cls = _Cls(hidden_size, num_cls, cls_hidden_size, dropout=dropout) | ||||
def forward(self, words, seq_len): | def forward(self, words, seq_len): | ||||
""" | """ | ||||
@@ -219,7 +219,7 @@ class STSeqCls(nn.Module): | |||||
y = 0.5 * (relay + nodes.max(1)[0]) | y = 0.5 * (relay + nodes.max(1)[0]) | ||||
output = self.cls(y) # [bsz, n_cls] | output = self.cls(y) # [bsz, n_cls] | ||||
return {Const.OUTPUT: output} | return {Const.OUTPUT: output} | ||||
def predict(self, words, seq_len): | def predict(self, words, seq_len): | ||||
""" | """ | ||||
@@ -251,7 +251,7 @@ class STNLICls(nn.Module): | |||||
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | ||||
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | ||||
""" | """ | ||||
def __init__(self, init_embed, num_cls, | def __init__(self, init_embed, num_cls, | ||||
hidden_size=300, | hidden_size=300, | ||||
num_layers=4, | num_layers=4, | ||||
@@ -271,7 +271,7 @@ class STNLICls(nn.Module): | |||||
emb_dropout=emb_dropout, | emb_dropout=emb_dropout, | ||||
dropout=dropout) | dropout=dropout) | ||||
self.cls = _NLICls(hidden_size, num_cls, cls_hidden_size) | self.cls = _NLICls(hidden_size, num_cls, cls_hidden_size) | ||||
def forward(self, words1, words2, seq_len1, seq_len2): | def forward(self, words1, words2, seq_len1, seq_len2): | ||||
""" | """ | ||||
@@ -283,16 +283,16 @@ class STNLICls(nn.Module): | |||||
""" | """ | ||||
mask1 = seq_len_to_mask(seq_len1) | mask1 = seq_len_to_mask(seq_len1) | ||||
mask2 = seq_len_to_mask(seq_len2) | mask2 = seq_len_to_mask(seq_len2) | ||||
def enc(seq, mask): | def enc(seq, mask): | ||||
nodes, relay = self.enc(seq, mask) | nodes, relay = self.enc(seq, mask) | ||||
return 0.5 * (relay + nodes.max(1)[0]) | return 0.5 * (relay + nodes.max(1)[0]) | ||||
y1 = enc(words1, mask1) | y1 = enc(words1, mask1) | ||||
y2 = enc(words2, mask2) | y2 = enc(words2, mask2) | ||||
output = self.cls(y1, y2) # [bsz, n_cls] | output = self.cls(y1, y2) # [bsz, n_cls] | ||||
return {Const.OUTPUT: output} | return {Const.OUTPUT: output} | ||||
def predict(self, words1, words2, seq_len1, seq_len2): | def predict(self, words1, words2, seq_len1, seq_len2): | ||||
""" | """ | ||||
@@ -1,45 +1,52 @@ | |||||
""" | """ | ||||
大部分用于的 NLP 任务神经网络都可以看做由编码 :mod:`~fastNLP.modules.encoder` 、 | |||||
解码 :mod:`~fastNLP.modules.decoder` 两种模块组成。 | |||||
.. image:: figures/text_classification.png | .. image:: figures/text_classification.png | ||||
:mod:`~fastNLP.modules` 中实现了 fastNLP 提供的诸多模块组件,可以帮助用户快速搭建自己所需的网络。 | |||||
两种模块的功能和常见组件如下: | |||||
大部分用于的 NLP 任务神经网络都可以看做由 :mod:`embedding<fastNLP.embeddings>` 、 :mod:`~fastNLP.modules.encoder` 、 | |||||
:mod:`~fastNLP.modules.decoder` 三种模块组成。 本模块中实现了 fastNLP 提供的诸多模块组件, | |||||
可以帮助用户快速搭建自己所需的网络。几种模块的功能和常见组件如下: | |||||
.. csv-table:: | |||||
:header: "类型", "功能", "常见组件" | |||||
"embedding", 参见 :doc:`/fastNLP.embeddings` , "Elmo, Bert" | |||||
"encoder", "将输入编码为具有表示能力的向量", "CNN, LSTM, Transformer" | |||||
"decoder", "将具有某种表示意义的向量解码为需要的输出形式 ", "MLP, CRF" | |||||
"其它", "配合其它组件使用的组件", "Dropout" | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
| module type | functionality | example | | |||||
+=======================+=======================+=======================+ | |||||
| encoder | 将输入编码为具有具 | embedding, RNN, CNN, | | |||||
| | 有表示能力的向量 | transformer | | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
| decoder | 将具有某种表示意义的 | MLP, CRF | | |||||
| | 向量解码为需要的输出 | | | |||||
| | 形式 | | | |||||
+-----------------------+-----------------------+-----------------------+ | |||||
""" | """ | ||||
__all__ = [ | __all__ = [ | ||||
# "BertModel", | # "BertModel", | ||||
"ConvolutionCharEncoder", | "ConvolutionCharEncoder", | ||||
"LSTMCharEncoder", | "LSTMCharEncoder", | ||||
"ConvMaxpool", | "ConvMaxpool", | ||||
"LSTM", | "LSTM", | ||||
"StarTransformer", | "StarTransformer", | ||||
"TransformerEncoder", | "TransformerEncoder", | ||||
"VarRNN", | "VarRNN", | ||||
"VarLSTM", | "VarLSTM", | ||||
"VarGRU", | "VarGRU", | ||||
"MaxPool", | "MaxPool", | ||||
"MaxPoolWithMask", | "MaxPoolWithMask", | ||||
"AvgPool", | "AvgPool", | ||||
"AvgPoolWithMask", | |||||
"MultiHeadAttention", | "MultiHeadAttention", | ||||
"MLP", | "MLP", | ||||
"ConditionalRandomField", | "ConditionalRandomField", | ||||
"viterbi_decode", | "viterbi_decode", | ||||
"allowed_transitions", | "allowed_transitions", | ||||
"TimestepDropout", | |||||
] | ] | ||||
from . import decoder | from . import decoder | ||||
@@ -11,7 +11,7 @@ from ..utils import initial_parameter | |||||
def allowed_transitions(id2target, encoding_type='bio', include_start_end=False): | def allowed_transitions(id2target, encoding_type='bio', include_start_end=False): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.allowed_transitions` :class:`fastNLP.modules.decoder.crf.allowed_transitions` | |||||
别名::class:`fastNLP.modules.allowed_transitions` :class:`fastNLP.modules.decoder.allowed_transitions` | |||||
给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 | 给定一个id到label的映射表,返回所有可以跳转的(from_tag_id, to_tag_id)列表。 | ||||
@@ -31,7 +31,7 @@ def allowed_transitions(id2target, encoding_type='bio', include_start_end=False) | |||||
id_label_lst = list(id2target.items()) | id_label_lst = list(id2target.items()) | ||||
if include_start_end: | if include_start_end: | ||||
id_label_lst += [(start_idx, 'start'), (end_idx, 'end')] | id_label_lst += [(start_idx, 'start'), (end_idx, 'end')] | ||||
def split_tag_label(from_label): | def split_tag_label(from_label): | ||||
from_label = from_label.lower() | from_label = from_label.lower() | ||||
if from_label in ['start', 'end']: | if from_label in ['start', 'end']: | ||||
@@ -41,7 +41,7 @@ def allowed_transitions(id2target, encoding_type='bio', include_start_end=False) | |||||
from_tag = from_label[:1] | from_tag = from_label[:1] | ||||
from_label = from_label[2:] | from_label = from_label[2:] | ||||
return from_tag, from_label | return from_tag, from_label | ||||
for from_id, from_label in id_label_lst: | for from_id, from_label in id_label_lst: | ||||
if from_label in ['<pad>', '<unk>']: | if from_label in ['<pad>', '<unk>']: | ||||
continue | continue | ||||
@@ -93,7 +93,7 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label | |||||
return to_tag in ['end', 'b', 'o'] | return to_tag in ['end', 'b', 'o'] | ||||
else: | else: | ||||
raise ValueError("Unexpect tag {}. Expect only 'B', 'I', 'O'.".format(from_tag)) | raise ValueError("Unexpect tag {}. Expect only 'B', 'I', 'O'.".format(from_tag)) | ||||
elif encoding_type == 'bmes': | elif encoding_type == 'bmes': | ||||
""" | """ | ||||
第一行是to_tag, 第一列是from_tag,y任意条件下可转,-只有在label相同时可转,n不可转 | 第一行是to_tag, 第一列是from_tag,y任意条件下可转,-只有在label相同时可转,n不可转 | ||||
@@ -151,7 +151,7 @@ def _is_transition_allowed(encoding_type, from_tag, from_label, to_tag, to_label | |||||
class ConditionalRandomField(nn.Module): | class ConditionalRandomField(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.ConditionalRandomField` :class:`fastNLP.modules.decoder.crf.ConditionalRandomField` | |||||
别名::class:`fastNLP.modules.ConditionalRandomField` :class:`fastNLP.modules.decoder.ConditionalRandomField` | |||||
条件随机场。 | 条件随机场。 | ||||
提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 | 提供forward()以及viterbi_decode()两个方法,分别用于训练与inference。 | ||||
@@ -163,21 +163,21 @@ class ConditionalRandomField(nn.Module): | |||||
allowed_transitions()函数得到;如果为None,则所有跃迁均为合法 | allowed_transitions()函数得到;如果为None,则所有跃迁均为合法 | ||||
:param str initial_method: 初始化方法。见initial_parameter | :param str initial_method: 初始化方法。见initial_parameter | ||||
""" | """ | ||||
def __init__(self, num_tags, include_start_end_trans=False, allowed_transitions=None, | def __init__(self, num_tags, include_start_end_trans=False, allowed_transitions=None, | ||||
initial_method=None): | initial_method=None): | ||||
super(ConditionalRandomField, self).__init__() | super(ConditionalRandomField, self).__init__() | ||||
self.include_start_end_trans = include_start_end_trans | self.include_start_end_trans = include_start_end_trans | ||||
self.num_tags = num_tags | self.num_tags = num_tags | ||||
# the meaning of entry in this matrix is (from_tag_id, to_tag_id) score | # the meaning of entry in this matrix is (from_tag_id, to_tag_id) score | ||||
self.trans_m = nn.Parameter(torch.randn(num_tags, num_tags)) | self.trans_m = nn.Parameter(torch.randn(num_tags, num_tags)) | ||||
if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
self.start_scores = nn.Parameter(torch.randn(num_tags)) | self.start_scores = nn.Parameter(torch.randn(num_tags)) | ||||
self.end_scores = nn.Parameter(torch.randn(num_tags)) | self.end_scores = nn.Parameter(torch.randn(num_tags)) | ||||
if allowed_transitions is None: | if allowed_transitions is None: | ||||
constrain = torch.zeros(num_tags + 2, num_tags + 2) | constrain = torch.zeros(num_tags + 2, num_tags + 2) | ||||
else: | else: | ||||
@@ -185,9 +185,9 @@ class ConditionalRandomField(nn.Module): | |||||
for from_tag_id, to_tag_id in allowed_transitions: | for from_tag_id, to_tag_id in allowed_transitions: | ||||
constrain[from_tag_id, to_tag_id] = 0 | constrain[from_tag_id, to_tag_id] = 0 | ||||
self._constrain = nn.Parameter(constrain, requires_grad=False) | self._constrain = nn.Parameter(constrain, requires_grad=False) | ||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def _normalizer_likelihood(self, logits, mask): | def _normalizer_likelihood(self, logits, mask): | ||||
"""Computes the (batch_size,) denominator term for the log-likelihood, which is the | """Computes the (batch_size,) denominator term for the log-likelihood, which is the | ||||
sum of the likelihoods across all possible state sequences. | sum of the likelihoods across all possible state sequences. | ||||
@@ -200,21 +200,21 @@ class ConditionalRandomField(nn.Module): | |||||
alpha = logits[0] | alpha = logits[0] | ||||
if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
alpha = alpha + self.start_scores.view(1, -1) | alpha = alpha + self.start_scores.view(1, -1) | ||||
flip_mask = mask.eq(0) | flip_mask = mask.eq(0) | ||||
for i in range(1, seq_len): | for i in range(1, seq_len): | ||||
emit_score = logits[i].view(batch_size, 1, n_tags) | emit_score = logits[i].view(batch_size, 1, n_tags) | ||||
trans_score = self.trans_m.view(1, n_tags, n_tags) | trans_score = self.trans_m.view(1, n_tags, n_tags) | ||||
tmp = alpha.view(batch_size, n_tags, 1) + emit_score + trans_score | tmp = alpha.view(batch_size, n_tags, 1) + emit_score + trans_score | ||||
alpha = torch.logsumexp(tmp, 1).masked_fill(flip_mask[i].view(batch_size, 1), 0) + \ | alpha = torch.logsumexp(tmp, 1).masked_fill(flip_mask[i].view(batch_size, 1), 0) + \ | ||||
alpha.masked_fill(mask[i].byte().view(batch_size, 1), 0) | alpha.masked_fill(mask[i].byte().view(batch_size, 1), 0) | ||||
if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
alpha = alpha + self.end_scores.view(1, -1) | alpha = alpha + self.end_scores.view(1, -1) | ||||
return torch.logsumexp(alpha, 1) | return torch.logsumexp(alpha, 1) | ||||
def _gold_score(self, logits, tags, mask): | def _gold_score(self, logits, tags, mask): | ||||
""" | """ | ||||
Compute the score for the gold path. | Compute the score for the gold path. | ||||
@@ -226,7 +226,7 @@ class ConditionalRandomField(nn.Module): | |||||
seq_len, batch_size, _ = logits.size() | seq_len, batch_size, _ = logits.size() | ||||
batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | ||||
seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | ||||
# trans_socre [L-1, B] | # trans_socre [L-1, B] | ||||
mask = mask.byte() | mask = mask.byte() | ||||
flip_mask = mask.eq(0) | flip_mask = mask.eq(0) | ||||
@@ -243,7 +243,7 @@ class ConditionalRandomField(nn.Module): | |||||
score = score + st_scores + ed_scores | score = score + st_scores + ed_scores | ||||
# return [B,] | # return [B,] | ||||
return score | return score | ||||
def forward(self, feats, tags, mask): | def forward(self, feats, tags, mask): | ||||
""" | """ | ||||
用于计算CRF的前向loss,返回值为一个batch_size的FloatTensor,可能需要mean()求得loss。 | 用于计算CRF的前向loss,返回值为一个batch_size的FloatTensor,可能需要mean()求得loss。 | ||||
@@ -258,9 +258,9 @@ class ConditionalRandomField(nn.Module): | |||||
mask = mask.transpose(0, 1).float() | mask = mask.transpose(0, 1).float() | ||||
all_path_score = self._normalizer_likelihood(feats, mask) | all_path_score = self._normalizer_likelihood(feats, mask) | ||||
gold_path_score = self._gold_score(feats, tags, mask) | gold_path_score = self._gold_score(feats, tags, mask) | ||||
return all_path_score - gold_path_score | return all_path_score - gold_path_score | ||||
def viterbi_decode(self, logits, mask, unpad=False): | def viterbi_decode(self, logits, mask, unpad=False): | ||||
"""给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | """给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | ||||
@@ -277,7 +277,7 @@ class ConditionalRandomField(nn.Module): | |||||
batch_size, seq_len, n_tags = logits.size() | batch_size, seq_len, n_tags = logits.size() | ||||
logits = logits.transpose(0, 1).data # L, B, H | logits = logits.transpose(0, 1).data # L, B, H | ||||
mask = mask.transpose(0, 1).data.byte() # L, B | mask = mask.transpose(0, 1).data.byte() # L, B | ||||
# dp | # dp | ||||
vpath = logits.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | vpath = logits.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | ||||
vscore = logits[0] | vscore = logits[0] | ||||
@@ -286,7 +286,7 @@ class ConditionalRandomField(nn.Module): | |||||
if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
transitions[n_tags, :n_tags] += self.start_scores.data | transitions[n_tags, :n_tags] += self.start_scores.data | ||||
transitions[:n_tags, n_tags + 1] += self.end_scores.data | transitions[:n_tags, n_tags + 1] += self.end_scores.data | ||||
vscore += transitions[n_tags, :n_tags] | vscore += transitions[n_tags, :n_tags] | ||||
trans_score = transitions[:n_tags, :n_tags].view(1, n_tags, n_tags).data | trans_score = transitions[:n_tags, :n_tags].view(1, n_tags, n_tags).data | ||||
for i in range(1, seq_len): | for i in range(1, seq_len): | ||||
@@ -297,17 +297,17 @@ class ConditionalRandomField(nn.Module): | |||||
vpath[i] = best_dst | vpath[i] = best_dst | ||||
vscore = best_score.masked_fill(mask[i].eq(0).view(batch_size, 1), 0) + \ | vscore = best_score.masked_fill(mask[i].eq(0).view(batch_size, 1), 0) + \ | ||||
vscore.masked_fill(mask[i].view(batch_size, 1), 0) | vscore.masked_fill(mask[i].view(batch_size, 1), 0) | ||||
if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
vscore += transitions[:n_tags, n_tags + 1].view(1, -1) | vscore += transitions[:n_tags, n_tags + 1].view(1, -1) | ||||
# backtrace | # backtrace | ||||
batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | ||||
seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | ||||
lens = (mask.long().sum(0) - 1) | lens = (mask.long().sum(0) - 1) | ||||
# idxes [L, B], batched idx from seq_len-1 to 0 | # idxes [L, B], batched idx from seq_len-1 to 0 | ||||
idxes = (lens.view(1, -1) - seq_idx.view(-1, 1)) % seq_len | idxes = (lens.view(1, -1) - seq_idx.view(-1, 1)) % seq_len | ||||
ans = logits.new_empty((seq_len, batch_size), dtype=torch.long) | ans = logits.new_empty((seq_len, batch_size), dtype=torch.long) | ||||
ans_score, last_tags = vscore.max(1) | ans_score, last_tags = vscore.max(1) | ||||
ans[idxes[0], batch_idx] = last_tags | ans[idxes[0], batch_idx] = last_tags | ||||
@@ -10,7 +10,7 @@ from ..utils import initial_parameter | |||||
class MLP(nn.Module): | class MLP(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.mlp.MLP` | |||||
别名::class:`fastNLP.modules.MLP` :class:`fastNLP.modules.decoder.MLP` | |||||
多层感知器 | 多层感知器 | ||||
@@ -40,7 +40,7 @@ class MLP(nn.Module): | |||||
>>> print(x) | >>> print(x) | ||||
>>> print(y) | >>> print(y) | ||||
""" | """ | ||||
def __init__(self, size_layer, activation='relu', output_activation=None, initial_method=None, dropout=0.0): | def __init__(self, size_layer, activation='relu', output_activation=None, initial_method=None, dropout=0.0): | ||||
super(MLP, self).__init__() | super(MLP, self).__init__() | ||||
self.hiddens = nn.ModuleList() | self.hiddens = nn.ModuleList() | ||||
@@ -51,9 +51,9 @@ class MLP(nn.Module): | |||||
self.output = nn.Linear(size_layer[i - 1], size_layer[i]) | self.output = nn.Linear(size_layer[i - 1], size_layer[i]) | ||||
else: | else: | ||||
self.hiddens.append(nn.Linear(size_layer[i - 1], size_layer[i])) | self.hiddens.append(nn.Linear(size_layer[i - 1], size_layer[i])) | ||||
self.dropout = nn.Dropout(p=dropout) | self.dropout = nn.Dropout(p=dropout) | ||||
actives = { | actives = { | ||||
'relu': nn.ReLU(), | 'relu': nn.ReLU(), | ||||
'tanh': nn.Tanh(), | 'tanh': nn.Tanh(), | ||||
@@ -82,7 +82,7 @@ class MLP(nn.Module): | |||||
else: | else: | ||||
raise ValueError("should set activation correctly: {}".format(activation)) | raise ValueError("should set activation correctly: {}".format(activation)) | ||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def forward(self, x): | def forward(self, x): | ||||
""" | """ | ||||
:param torch.Tensor x: MLP接受的输入 | :param torch.Tensor x: MLP接受的输入 | ||||
@@ -6,7 +6,7 @@ import torch | |||||
def viterbi_decode(logits, transitions, mask=None, unpad=False): | def viterbi_decode(logits, transitions, mask=None, unpad=False): | ||||
r""" | r""" | ||||
别名::class:`fastNLP.modules.viterbi_decode` :class:`fastNLP.modules.decoder.utils.viterbi_decode` | |||||
别名::class:`fastNLP.modules.viterbi_decode` :class:`fastNLP.modules.decoder.viterbi_decode` | |||||
给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | 给定一个特征矩阵以及转移分数矩阵,计算出最佳的路径以及对应的分数 | ||||
@@ -30,11 +30,11 @@ def viterbi_decode(logits, transitions, mask=None, unpad=False): | |||||
mask = mask.transpose(0, 1).data.byte() # L, B | mask = mask.transpose(0, 1).data.byte() # L, B | ||||
else: | else: | ||||
mask = logits.new_ones((seq_len, batch_size), dtype=torch.uint8) | mask = logits.new_ones((seq_len, batch_size), dtype=torch.uint8) | ||||
# dp | # dp | ||||
vpath = logits.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | vpath = logits.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | ||||
vscore = logits[0] | vscore = logits[0] | ||||
trans_score = transitions.view(1, n_tags, n_tags).data | trans_score = transitions.view(1, n_tags, n_tags).data | ||||
for i in range(1, seq_len): | for i in range(1, seq_len): | ||||
prev_score = vscore.view(batch_size, n_tags, 1) | prev_score = vscore.view(batch_size, n_tags, 1) | ||||
@@ -44,14 +44,14 @@ def viterbi_decode(logits, transitions, mask=None, unpad=False): | |||||
vpath[i] = best_dst | vpath[i] = best_dst | ||||
vscore = best_score.masked_fill(mask[i].eq(0).view(batch_size, 1), 0) + \ | vscore = best_score.masked_fill(mask[i].eq(0).view(batch_size, 1), 0) + \ | ||||
vscore.masked_fill(mask[i].view(batch_size, 1), 0) | vscore.masked_fill(mask[i].view(batch_size, 1), 0) | ||||
# backtrace | # backtrace | ||||
batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | batch_idx = torch.arange(batch_size, dtype=torch.long, device=logits.device) | ||||
seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | seq_idx = torch.arange(seq_len, dtype=torch.long, device=logits.device) | ||||
lens = (mask.long().sum(0) - 1) | lens = (mask.long().sum(0) - 1) | ||||
# idxes [L, B], batched idx from seq_len-1 to 0 | # idxes [L, B], batched idx from seq_len-1 to 0 | ||||
idxes = (lens.view(1, -1) - seq_idx.view(-1, 1)) % seq_len | idxes = (lens.view(1, -1) - seq_idx.view(-1, 1)) % seq_len | ||||
ans = logits.new_empty((seq_len, batch_size), dtype=torch.long) | ans = logits.new_empty((seq_len, batch_size), dtype=torch.long) | ||||
ans_score, last_tags = vscore.max(1) | ans_score, last_tags = vscore.max(1) | ||||
ans[idxes[0], batch_idx] = last_tags | ans[idxes[0], batch_idx] = last_tags | ||||
@@ -5,10 +5,8 @@ import torch | |||||
class TimestepDropout(torch.nn.Dropout): | class TimestepDropout(torch.nn.Dropout): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.TimestepDropout` | |||||
接受的参数shape为``[batch_size, num_timesteps, embedding_dim)]`` 使用同一个mask(shape为``(batch_size, embedding_dim)``) | |||||
在每个timestamp上做dropout。 | |||||
传入参数的shape为 ``(batch_size, num_timesteps, embedding_dim)`` | |||||
使用同一个shape为 ``(batch_size, embedding_dim)`` 的mask在每个timestamp上做dropout。 | |||||
""" | """ | ||||
def forward(self, x): | def forward(self, x): | ||||
@@ -1,17 +1,17 @@ | |||||
__all__ = [ | __all__ = [ | ||||
"BertModel", | |||||
# "BertModel", | |||||
"ConvolutionCharEncoder", | "ConvolutionCharEncoder", | ||||
"LSTMCharEncoder", | "LSTMCharEncoder", | ||||
"ConvMaxpool", | "ConvMaxpool", | ||||
"LSTM", | "LSTM", | ||||
"StarTransformer", | "StarTransformer", | ||||
"TransformerEncoder", | "TransformerEncoder", | ||||
"VarRNN", | "VarRNN", | ||||
"VarLSTM", | "VarLSTM", | ||||
"VarGRU", | "VarGRU", | ||||
@@ -8,8 +8,6 @@ import torch | |||||
import torch.nn.functional as F | import torch.nn.functional as F | ||||
from torch import nn | from torch import nn | ||||
from fastNLP.modules.dropout import TimestepDropout | |||||
from fastNLP.modules.utils import initial_parameter | from fastNLP.modules.utils import initial_parameter | ||||
@@ -18,7 +16,7 @@ class DotAttention(nn.Module): | |||||
.. todo:: | .. todo:: | ||||
补上文档 | 补上文档 | ||||
""" | """ | ||||
def __init__(self, key_size, value_size, dropout=0.0): | def __init__(self, key_size, value_size, dropout=0.0): | ||||
super(DotAttention, self).__init__() | super(DotAttention, self).__init__() | ||||
self.key_size = key_size | self.key_size = key_size | ||||
@@ -26,7 +24,7 @@ class DotAttention(nn.Module): | |||||
self.scale = math.sqrt(key_size) | self.scale = math.sqrt(key_size) | ||||
self.drop = nn.Dropout(dropout) | self.drop = nn.Dropout(dropout) | ||||
self.softmax = nn.Softmax(dim=2) | self.softmax = nn.Softmax(dim=2) | ||||
def forward(self, Q, K, V, mask_out=None): | def forward(self, Q, K, V, mask_out=None): | ||||
""" | """ | ||||
@@ -45,7 +43,7 @@ class DotAttention(nn.Module): | |||||
class MultiHeadAttention(nn.Module): | class MultiHeadAttention(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.MultiHeadAttention` :class:`fastNLP.modules.encoder.attention.MultiHeadAttention` | |||||
别名::class:`fastNLP.modules.MultiHeadAttention` :class:`fastNLP.modules.encoder.MultiHeadAttention` | |||||
:param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | :param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | ||||
:param key_size: int, 每个head的维度大小。 | :param key_size: int, 每个head的维度大小。 | ||||
@@ -53,14 +51,14 @@ class MultiHeadAttention(nn.Module): | |||||
:param num_head: int,head的数量。 | :param num_head: int,head的数量。 | ||||
:param dropout: float。 | :param dropout: float。 | ||||
""" | """ | ||||
def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | ||||
super(MultiHeadAttention, self).__init__() | super(MultiHeadAttention, self).__init__() | ||||
self.input_size = input_size | self.input_size = input_size | ||||
self.key_size = key_size | self.key_size = key_size | ||||
self.value_size = value_size | self.value_size = value_size | ||||
self.num_head = num_head | self.num_head = num_head | ||||
in_size = key_size * num_head | in_size = key_size * num_head | ||||
self.q_in = nn.Linear(input_size, in_size) | self.q_in = nn.Linear(input_size, in_size) | ||||
self.k_in = nn.Linear(input_size, in_size) | self.k_in = nn.Linear(input_size, in_size) | ||||
@@ -69,14 +67,14 @@ class MultiHeadAttention(nn.Module): | |||||
self.attention = DotAttention(key_size=key_size, value_size=value_size, dropout=dropout) | self.attention = DotAttention(key_size=key_size, value_size=value_size, dropout=dropout) | ||||
self.out = nn.Linear(value_size * num_head, input_size) | self.out = nn.Linear(value_size * num_head, input_size) | ||||
self.reset_parameters() | self.reset_parameters() | ||||
def reset_parameters(self): | def reset_parameters(self): | ||||
sqrt = math.sqrt | sqrt = math.sqrt | ||||
nn.init.normal_(self.q_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.key_size))) | nn.init.normal_(self.q_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.key_size))) | ||||
nn.init.normal_(self.k_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.key_size))) | nn.init.normal_(self.k_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.key_size))) | ||||
nn.init.normal_(self.v_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.value_size))) | nn.init.normal_(self.v_in.weight, mean=0, std=sqrt(2.0 / (self.input_size + self.value_size))) | ||||
nn.init.xavier_normal_(self.out.weight) | nn.init.xavier_normal_(self.out.weight) | ||||
def forward(self, Q, K, V, atte_mask_out=None): | def forward(self, Q, K, V, atte_mask_out=None): | ||||
""" | """ | ||||
@@ -92,7 +90,7 @@ class MultiHeadAttention(nn.Module): | |||||
q = self.q_in(Q).view(batch, sq, n_head, d_k) | q = self.q_in(Q).view(batch, sq, n_head, d_k) | ||||
k = self.k_in(K).view(batch, sk, n_head, d_k) | k = self.k_in(K).view(batch, sk, n_head, d_k) | ||||
v = self.v_in(V).view(batch, sk, n_head, d_v) | v = self.v_in(V).view(batch, sk, n_head, d_v) | ||||
# transpose q, k and v to do batch attention | # transpose q, k and v to do batch attention | ||||
q = q.permute(2, 0, 1, 3).contiguous().view(-1, sq, d_k) | q = q.permute(2, 0, 1, 3).contiguous().view(-1, sq, d_k) | ||||
k = k.permute(2, 0, 1, 3).contiguous().view(-1, sk, d_k) | k = k.permute(2, 0, 1, 3).contiguous().view(-1, sk, d_k) | ||||
@@ -100,7 +98,7 @@ class MultiHeadAttention(nn.Module): | |||||
if atte_mask_out is not None: | if atte_mask_out is not None: | ||||
atte_mask_out = atte_mask_out.repeat(n_head, 1, 1) | atte_mask_out = atte_mask_out.repeat(n_head, 1, 1) | ||||
atte = self.attention(q, k, v, atte_mask_out).view(n_head, batch, sq, d_v) | atte = self.attention(q, k, v, atte_mask_out).view(n_head, batch, sq, d_v) | ||||
# concat all heads, do output linear | # concat all heads, do output linear | ||||
atte = atte.permute(1, 2, 0, 3).contiguous().view(batch, sq, -1) | atte = atte.permute(1, 2, 0, 3).contiguous().view(batch, sq, -1) | ||||
output = self.out(atte) | output = self.out(atte) | ||||
@@ -124,11 +122,11 @@ class BiAttention(nn.Module): | |||||
\end{array} | \end{array} | ||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
super(BiAttention, self).__init__() | super(BiAttention, self).__init__() | ||||
self.inf = 10e12 | self.inf = 10e12 | ||||
def forward(self, in_x1, in_x2, x1_len, x2_len): | def forward(self, in_x1, in_x2, x1_len, x2_len): | ||||
""" | """ | ||||
:param torch.Tensor in_x1: [batch_size, x1_seq_len, hidden_size] 第一句的特征表示 | :param torch.Tensor in_x1: [batch_size, x1_seq_len, hidden_size] 第一句的特征表示 | ||||
@@ -139,36 +137,36 @@ class BiAttention(nn.Module): | |||||
torch.Tensor out_x2: [batch_size, x2_seq_len, hidden_size] 第一句attend到的特征表示 | torch.Tensor out_x2: [batch_size, x2_seq_len, hidden_size] 第一句attend到的特征表示 | ||||
""" | """ | ||||
assert in_x1.size()[0] == in_x2.size()[0] | assert in_x1.size()[0] == in_x2.size()[0] | ||||
assert in_x1.size()[2] == in_x2.size()[2] | assert in_x1.size()[2] == in_x2.size()[2] | ||||
# The batch size and hidden size must be equal. | # The batch size and hidden size must be equal. | ||||
assert in_x1.size()[1] == x1_len.size()[1] and in_x2.size()[1] == x2_len.size()[1] | assert in_x1.size()[1] == x1_len.size()[1] and in_x2.size()[1] == x2_len.size()[1] | ||||
# The seq len in in_x and x_len must be equal. | # The seq len in in_x and x_len must be equal. | ||||
assert in_x1.size()[0] == x1_len.size()[0] and x1_len.size()[0] == x2_len.size()[0] | assert in_x1.size()[0] == x1_len.size()[0] and x1_len.size()[0] == x2_len.size()[0] | ||||
batch_size = in_x1.size()[0] | batch_size = in_x1.size()[0] | ||||
x1_max_len = in_x1.size()[1] | x1_max_len = in_x1.size()[1] | ||||
x2_max_len = in_x2.size()[1] | x2_max_len = in_x2.size()[1] | ||||
in_x2_t = torch.transpose(in_x2, 1, 2) # [batch_size, hidden_size, x2_seq_len] | in_x2_t = torch.transpose(in_x2, 1, 2) # [batch_size, hidden_size, x2_seq_len] | ||||
attention_matrix = torch.bmm(in_x1, in_x2_t) # [batch_size, x1_seq_len, x2_seq_len] | attention_matrix = torch.bmm(in_x1, in_x2_t) # [batch_size, x1_seq_len, x2_seq_len] | ||||
a_mask = x1_len.le(0.5).float() * -self.inf # [batch_size, x1_seq_len] | a_mask = x1_len.le(0.5).float() * -self.inf # [batch_size, x1_seq_len] | ||||
a_mask = a_mask.view(batch_size, x1_max_len, -1) | a_mask = a_mask.view(batch_size, x1_max_len, -1) | ||||
a_mask = a_mask.expand(-1, -1, x2_max_len) # [batch_size, x1_seq_len, x2_seq_len] | a_mask = a_mask.expand(-1, -1, x2_max_len) # [batch_size, x1_seq_len, x2_seq_len] | ||||
b_mask = x2_len.le(0.5).float() * -self.inf | b_mask = x2_len.le(0.5).float() * -self.inf | ||||
b_mask = b_mask.view(batch_size, -1, x2_max_len) | b_mask = b_mask.view(batch_size, -1, x2_max_len) | ||||
b_mask = b_mask.expand(-1, x1_max_len, -1) # [batch_size, x1_seq_len, x2_seq_len] | b_mask = b_mask.expand(-1, x1_max_len, -1) # [batch_size, x1_seq_len, x2_seq_len] | ||||
attention_a = F.softmax(attention_matrix + a_mask, dim=2) # [batch_size, x1_seq_len, x2_seq_len] | attention_a = F.softmax(attention_matrix + a_mask, dim=2) # [batch_size, x1_seq_len, x2_seq_len] | ||||
attention_b = F.softmax(attention_matrix + b_mask, dim=1) # [batch_size, x1_seq_len, x2_seq_len] | attention_b = F.softmax(attention_matrix + b_mask, dim=1) # [batch_size, x1_seq_len, x2_seq_len] | ||||
out_x1 = torch.bmm(attention_a, in_x2) # [batch_size, x1_seq_len, hidden_size] | out_x1 = torch.bmm(attention_a, in_x2) # [batch_size, x1_seq_len, hidden_size] | ||||
attention_b_t = torch.transpose(attention_b, 1, 2) | attention_b_t = torch.transpose(attention_b, 1, 2) | ||||
out_x2 = torch.bmm(attention_b_t, in_x1) # [batch_size, x2_seq_len, hidden_size] | out_x2 = torch.bmm(attention_b_t, in_x1) # [batch_size, x2_seq_len, hidden_size] | ||||
return out_x1, out_x2 | return out_x1, out_x2 | ||||
@@ -182,10 +180,10 @@ class SelfAttention(nn.Module): | |||||
:param float drop: dropout概率,默认值为0.5 | :param float drop: dropout概率,默认值为0.5 | ||||
:param str initial_method: 初始化参数方法 | :param str initial_method: 初始化参数方法 | ||||
""" | """ | ||||
def __init__(self, input_size, attention_unit=300, attention_hops=10, drop=0.5, initial_method=None, ): | def __init__(self, input_size, attention_unit=300, attention_hops=10, drop=0.5, initial_method=None, ): | ||||
super(SelfAttention, self).__init__() | super(SelfAttention, self).__init__() | ||||
self.attention_hops = attention_hops | self.attention_hops = attention_hops | ||||
self.ws1 = nn.Linear(input_size, attention_unit, bias=False) | self.ws1 = nn.Linear(input_size, attention_unit, bias=False) | ||||
self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False) | self.ws2 = nn.Linear(attention_unit, attention_hops, bias=False) | ||||
@@ -194,7 +192,7 @@ class SelfAttention(nn.Module): | |||||
self.drop = nn.Dropout(drop) | self.drop = nn.Dropout(drop) | ||||
self.tanh = nn.Tanh() | self.tanh = nn.Tanh() | ||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def _penalization(self, attention): | def _penalization(self, attention): | ||||
""" | """ | ||||
compute the penalization term for attention module | compute the penalization term for attention module | ||||
@@ -208,7 +206,7 @@ class SelfAttention(nn.Module): | |||||
mat = torch.bmm(attention, attention_t) - self.I[:attention.size(0)] | mat = torch.bmm(attention, attention_t) - self.I[:attention.size(0)] | ||||
ret = (torch.sum(torch.sum((mat ** 2), 2), 1).squeeze() + 1e-10) ** 0.5 | ret = (torch.sum(torch.sum((mat ** 2), 2), 1).squeeze() + 1e-10) ** 0.5 | ||||
return torch.sum(ret) / size[0] | return torch.sum(ret) / size[0] | ||||
def forward(self, input, input_origin): | def forward(self, input, input_origin): | ||||
""" | """ | ||||
:param torch.Tensor input: [baz, senLen, h_dim] 要做attention的矩阵 | :param torch.Tensor input: [baz, senLen, h_dim] 要做attention的矩阵 | ||||
@@ -218,14 +216,14 @@ class SelfAttention(nn.Module): | |||||
""" | """ | ||||
input = input.contiguous() | input = input.contiguous() | ||||
size = input.size() # [bsz, len, nhid] | size = input.size() # [bsz, len, nhid] | ||||
input_origin = input_origin.expand(self.attention_hops, -1, -1) # [hops,baz, len] | input_origin = input_origin.expand(self.attention_hops, -1, -1) # [hops,baz, len] | ||||
input_origin = input_origin.transpose(0, 1).contiguous() # [baz, hops,len] | input_origin = input_origin.transpose(0, 1).contiguous() # [baz, hops,len] | ||||
y1 = self.tanh(self.ws1(self.drop(input))) # [baz,len,dim] -->[bsz,len, attention-unit] | y1 = self.tanh(self.ws1(self.drop(input))) # [baz,len,dim] -->[bsz,len, attention-unit] | ||||
attention = self.ws2(y1).transpose(1, 2).contiguous() | attention = self.ws2(y1).transpose(1, 2).contiguous() | ||||
# [bsz,len, attention-unit]--> [bsz, len, hop]--> [baz,hop,len] | # [bsz,len, attention-unit]--> [bsz, len, hop]--> [baz,hop,len] | ||||
attention = attention + (-999999 * (input_origin == 0).float()) # remove the weight on padding token. | attention = attention + (-999999 * (input_origin == 0).float()) # remove the weight on padding token. | ||||
attention = F.softmax(attention, 2) # [baz ,hop, len] | attention = F.softmax(attention, 2) # [baz ,hop, len] | ||||
return torch.bmm(attention, input), self._penalization(attention) # output1 --> [baz ,hop ,nhid] | return torch.bmm(attention, input), self._penalization(attention) # output1 --> [baz ,hop ,nhid] |
@@ -1,11 +1,11 @@ | |||||
""" | """ | ||||
这个页面的代码很大程度上参考(复制粘贴)了https://github.com/huggingface/pytorch-pretrained-BERT的代码, 如果你发现该代码对你 | 这个页面的代码很大程度上参考(复制粘贴)了https://github.com/huggingface/pytorch-pretrained-BERT的代码, 如果你发现该代码对你 | ||||
有用,也请引用一下他们。 | 有用,也请引用一下他们。 | ||||
""" | """ | ||||
__all__ = [ | |||||
"BertModel" | |||||
] | |||||
import collections | import collections | ||||
@@ -29,6 +29,7 @@ VOCAB_NAME = 'vocab.txt' | |||||
class BertConfig(object): | class BertConfig(object): | ||||
"""Configuration class to store the configuration of a `BertModel`. | """Configuration class to store the configuration of a `BertModel`. | ||||
""" | """ | ||||
def __init__(self, | def __init__(self, | ||||
vocab_size_or_config_json_file, | vocab_size_or_config_json_file, | ||||
hidden_size=768, | hidden_size=768, | ||||
@@ -67,8 +68,7 @@ class BertConfig(object): | |||||
initializing all weight matrices. | initializing all weight matrices. | ||||
layer_norm_eps: The epsilon used by LayerNorm. | layer_norm_eps: The epsilon used by LayerNorm. | ||||
""" | """ | ||||
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 | |||||
and isinstance(vocab_size_or_config_json_file, unicode)): | |||||
if isinstance(vocab_size_or_config_json_file, str): | |||||
with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: | with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: | ||||
json_config = json.loads(reader.read()) | json_config = json.loads(reader.read()) | ||||
for key, value in json_config.items(): | for key, value in json_config.items(): | ||||
@@ -153,6 +153,7 @@ class BertLayerNorm(nn.Module): | |||||
class BertEmbeddings(nn.Module): | class BertEmbeddings(nn.Module): | ||||
"""Construct the embeddings from word, position and token_type embeddings. | """Construct the embeddings from word, position and token_type embeddings. | ||||
""" | """ | ||||
def __init__(self, config): | def __init__(self, config): | ||||
super(BertEmbeddings, self).__init__() | super(BertEmbeddings, self).__init__() | ||||
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) | self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) | ||||
@@ -262,7 +263,7 @@ class BertIntermediate(nn.Module): | |||||
def __init__(self, config): | def __init__(self, config): | ||||
super(BertIntermediate, self).__init__() | super(BertIntermediate, self).__init__() | ||||
self.dense = nn.Linear(config.hidden_size, config.intermediate_size) | self.dense = nn.Linear(config.hidden_size, config.intermediate_size) | ||||
if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): | |||||
if isinstance(config.hidden_act, str): | |||||
self.intermediate_act_fn = ACT2FN[config.hidden_act] | self.intermediate_act_fn = ACT2FN[config.hidden_act] | ||||
else: | else: | ||||
self.intermediate_act_fn = config.hidden_act | self.intermediate_act_fn = config.hidden_act | ||||
@@ -334,7 +335,10 @@ class BertPooler(nn.Module): | |||||
class BertModel(nn.Module): | class BertModel(nn.Module): | ||||
"""BERT(Bidirectional Embedding Representations from Transformers). | |||||
""" | |||||
别名::class:`fastNLP.modules.BertModel` :class:`fastNLP.modules.encoder.BertModel` | |||||
BERT(Bidirectional Embedding Representations from Transformers). | |||||
如果你想使用预训练好的权重矩阵,请在以下网址下载. | 如果你想使用预训练好的权重矩阵,请在以下网址下载. | ||||
sources:: | sources:: | ||||
@@ -576,6 +580,7 @@ def load_vocab(vocab_file): | |||||
index += 1 | index += 1 | ||||
return vocab | return vocab | ||||
class BasicTokenizer(object): | class BasicTokenizer(object): | ||||
"""Runs basic tokenization (punctuation splitting, lower casing, etc.).""" | """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" | ||||
@@ -761,8 +766,8 @@ class BertTokenizer(object): | |||||
[(ids, tok) for tok, ids in self.vocab.items()]) | [(ids, tok) for tok, ids in self.vocab.items()]) | ||||
self.do_basic_tokenize = do_basic_tokenize | self.do_basic_tokenize = do_basic_tokenize | ||||
if do_basic_tokenize: | if do_basic_tokenize: | ||||
self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, | |||||
never_split=never_split) | |||||
self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, | |||||
never_split=never_split) | |||||
self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) | self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) | ||||
self.max_len = max_len if max_len is not None else int(1e12) | self.max_len = max_len if max_len is not None else int(1e12) | ||||
@@ -817,7 +822,7 @@ class BertTokenizer(object): | |||||
for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): | for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): | ||||
if index != token_index: | if index != token_index: | ||||
print("Saving vocabulary to {}: vocabulary indices are not consecutive." | print("Saving vocabulary to {}: vocabulary indices are not consecutive." | ||||
" Please check that the vocabulary is not corrupted!".format(vocab_file)) | |||||
" Please check that the vocabulary is not corrupted!".format(vocab_file)) | |||||
index = token_index | index = token_index | ||||
writer.write(token + u'\n') | writer.write(token + u'\n') | ||||
index += 1 | index += 1 | ||||
@@ -837,13 +842,13 @@ class BertTokenizer(object): | |||||
tokenizer = cls(pretrained_model_name_or_path, *inputs, **kwargs) | tokenizer = cls(pretrained_model_name_or_path, *inputs, **kwargs) | ||||
return tokenizer | return tokenizer | ||||
class _WordPieceBertModel(nn.Module): | class _WordPieceBertModel(nn.Module): | ||||
""" | """ | ||||
这个模块用于直接计算word_piece的结果. | 这个模块用于直接计算word_piece的结果. | ||||
""" | """ | ||||
def __init__(self, model_dir:str, layers:str='-1'): | |||||
def __init__(self, model_dir: str, layers: str = '-1'): | |||||
super().__init__() | super().__init__() | ||||
self.tokenzier = BertTokenizer.from_pretrained(model_dir) | self.tokenzier = BertTokenizer.from_pretrained(model_dir) | ||||
@@ -852,11 +857,11 @@ class _WordPieceBertModel(nn.Module): | |||||
encoder_layer_number = len(self.encoder.encoder.layer) | encoder_layer_number = len(self.encoder.encoder.layer) | ||||
self.layers = list(map(int, layers.split(','))) | self.layers = list(map(int, layers.split(','))) | ||||
for layer in self.layers: | for layer in self.layers: | ||||
if layer<0: | |||||
assert -layer<=encoder_layer_number, f"The layer index:{layer} is out of scope for " \ | |||||
if layer < 0: | |||||
assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \ | |||||
f"a bert model with {encoder_layer_number} layers." | f"a bert model with {encoder_layer_number} layers." | ||||
else: | else: | ||||
assert layer<encoder_layer_number, f"The layer index:{layer} is out of scope for " \ | |||||
assert layer < encoder_layer_number, f"The layer index:{layer} is out of scope for " \ | |||||
f"a bert model with {encoder_layer_number} layers." | f"a bert model with {encoder_layer_number} layers." | ||||
self._cls_index = self.tokenzier.vocab['[CLS]'] | self._cls_index = self.tokenzier.vocab['[CLS]'] | ||||
@@ -872,15 +877,16 @@ class _WordPieceBertModel(nn.Module): | |||||
:param field_name: 基于哪一列index | :param field_name: 基于哪一列index | ||||
:return: | :return: | ||||
""" | """ | ||||
def convert_words_to_word_pieces(words): | def convert_words_to_word_pieces(words): | ||||
word_pieces = [] | word_pieces = [] | ||||
for word in words: | for word in words: | ||||
tokens = self.tokenzier.wordpiece_tokenizer.tokenize(word) | tokens = self.tokenzier.wordpiece_tokenizer.tokenize(word) | ||||
word_piece_ids = self.tokenzier.convert_tokens_to_ids(tokens) | word_piece_ids = self.tokenzier.convert_tokens_to_ids(tokens) | ||||
word_pieces.extend(word_piece_ids) | word_pieces.extend(word_piece_ids) | ||||
if word_pieces[0]!=self._cls_index: | |||||
if word_pieces[0] != self._cls_index: | |||||
word_pieces.insert(0, self._cls_index) | word_pieces.insert(0, self._cls_index) | ||||
if word_pieces[-1]!=self._sep_index: | |||||
if word_pieces[-1] != self._sep_index: | |||||
word_pieces.insert(-1, self._sep_index) | word_pieces.insert(-1, self._sep_index) | ||||
return word_pieces | return word_pieces | ||||
@@ -904,10 +910,9 @@ class _WordPieceBertModel(nn.Module): | |||||
attn_masks = word_pieces.ne(self._wordpiece_pad_index) | attn_masks = word_pieces.ne(self._wordpiece_pad_index) | ||||
bert_outputs, _ = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks, | bert_outputs, _ = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks, | ||||
output_all_encoded_layers=True) | |||||
output_all_encoded_layers=True) | |||||
# output_layers = [self.layers] # len(self.layers) x batch_size x max_word_piece_length x hidden_size | # output_layers = [self.layers] # len(self.layers) x batch_size x max_word_piece_length x hidden_size | ||||
outputs = bert_outputs[0].new_zeros((len(self.layers), batch_size, max_len, bert_outputs[0].size(-1))) | outputs = bert_outputs[0].new_zeros((len(self.layers), batch_size, max_len, bert_outputs[0].size(-1))) | ||||
for l_index, l in enumerate(self.layers): | for l_index, l in enumerate(self.layers): | ||||
outputs[l_index] = bert_outputs[l] | outputs[l_index] = bert_outputs[l] | ||||
return outputs | return outputs | ||||
@@ -11,7 +11,7 @@ from ..utils import initial_parameter | |||||
# from torch.nn.init import xavier_uniform | # from torch.nn.init import xavier_uniform | ||||
class ConvolutionCharEncoder(nn.Module): | class ConvolutionCharEncoder(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.ConvolutionCharEncoder` | |||||
别名::class:`fastNLP.modules.ConvolutionCharEncoder` :class:`fastNLP.modules.encoder.ConvolutionCharEncoder` | |||||
char级别的卷积编码器. | char级别的卷积编码器. | ||||
@@ -21,15 +21,16 @@ class ConvolutionCharEncoder(nn.Module): | |||||
:param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | :param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | ||||
:param initial_method: 初始化参数的方式, 默认为`xavier normal` | :param initial_method: 初始化参数的方式, 默认为`xavier normal` | ||||
""" | """ | ||||
def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(1, 3, 5), initial_method=None): | def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(1, 3, 5), initial_method=None): | ||||
super(ConvolutionCharEncoder, self).__init__() | super(ConvolutionCharEncoder, self).__init__() | ||||
self.convs = nn.ModuleList([ | self.convs = nn.ModuleList([ | ||||
nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, kernels[i]//2)) | |||||
nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, | |||||
padding=(0, kernels[i] // 2)) | |||||
for i in range(len(kernels))]) | for i in range(len(kernels))]) | ||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def forward(self, x): | def forward(self, x): | ||||
""" | """ | ||||
:param torch.Tensor x: ``[batch_size * sent_length, word_length, char_emb_size]`` 输入字符的embedding | :param torch.Tensor x: ``[batch_size * sent_length, word_length, char_emb_size]`` 输入字符的embedding | ||||
@@ -40,7 +41,7 @@ class ConvolutionCharEncoder(nn.Module): | |||||
x = x.transpose(2, 3) | x = x.transpose(2, 3) | ||||
# [batch_size*sent_length, channel, height, width] | # [batch_size*sent_length, channel, height, width] | ||||
return self._convolute(x).unsqueeze(2) | return self._convolute(x).unsqueeze(2) | ||||
def _convolute(self, x): | def _convolute(self, x): | ||||
feats = [] | feats = [] | ||||
for conv in self.convs: | for conv in self.convs: | ||||
@@ -57,13 +58,13 @@ class ConvolutionCharEncoder(nn.Module): | |||||
class LSTMCharEncoder(nn.Module): | class LSTMCharEncoder(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.LSTMCharEncoder` :class:`fastNLP.modules.encoder.char_encoder.LSTMCharEncoder` | |||||
别名::class:`fastNLP.modules.LSTMCharEncoder` :class:`fastNLP.modules.encoder.LSTMCharEncoder` | |||||
char级别基于LSTM的encoder. | char级别基于LSTM的encoder. | ||||
""" | """ | ||||
def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None): | def __init__(self, char_emb_size=50, hidden_size=None, initial_method=None): | ||||
""" | """ | ||||
:param int char_emb_size: char级别embedding的维度. Default: 50 | :param int char_emb_size: char级别embedding的维度. Default: 50 | ||||
@@ -73,14 +74,14 @@ class LSTMCharEncoder(nn.Module): | |||||
""" | """ | ||||
super(LSTMCharEncoder, self).__init__() | super(LSTMCharEncoder, self).__init__() | ||||
self.hidden_size = char_emb_size if hidden_size is None else hidden_size | self.hidden_size = char_emb_size if hidden_size is None else hidden_size | ||||
self.lstm = nn.LSTM(input_size=char_emb_size, | self.lstm = nn.LSTM(input_size=char_emb_size, | ||||
hidden_size=self.hidden_size, | hidden_size=self.hidden_size, | ||||
num_layers=1, | num_layers=1, | ||||
bias=True, | bias=True, | ||||
batch_first=True) | batch_first=True) | ||||
initial_parameter(self, initial_method) | initial_parameter(self, initial_method) | ||||
def forward(self, x): | def forward(self, x): | ||||
""" | """ | ||||
:param torch.Tensor x: ``[ n_batch*n_word, word_length, char_emb_size]`` 输入字符的embedding | :param torch.Tensor x: ``[ n_batch*n_word, word_length, char_emb_size]`` 输入字符的embedding | ||||
@@ -91,6 +92,6 @@ class LSTMCharEncoder(nn.Module): | |||||
h0 = nn.init.orthogonal_(h0) | h0 = nn.init.orthogonal_(h0) | ||||
c0 = torch.empty(1, batch_size, self.hidden_size) | c0 = torch.empty(1, batch_size, self.hidden_size) | ||||
c0 = nn.init.orthogonal_(c0) | c0 = nn.init.orthogonal_(c0) | ||||
_, hidden = self.lstm(x, (h0, c0)) | _, hidden = self.lstm(x, (h0, c0)) | ||||
return hidden[0].squeeze().unsqueeze(2) | return hidden[0].squeeze().unsqueeze(2) |
@@ -5,9 +5,10 @@ import torch | |||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.functional as F | import torch.nn.functional as F | ||||
class ConvMaxpool(nn.Module): | class ConvMaxpool(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.ConvMaxpool` :class:`fastNLP.modules.encoder.conv_maxpool.ConvMaxpool` | |||||
别名::class:`fastNLP.modules.ConvMaxpool` :class:`fastNLP.modules.encoder.ConvMaxpool` | |||||
集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x | 集合了Convolution和Max-Pooling于一体的层。给定一个batch_size x max_len x input_size的输入,返回batch_size x | ||||
sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len) | sum(output_channels) 大小的matrix。在内部,是先使用CNN给输入做卷积,然后经过activation激活层,在通过在长度(max_len) | ||||
@@ -18,12 +19,12 @@ class ConvMaxpool(nn.Module): | |||||
:param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | :param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | ||||
:param str activation: Convolution后的结果将通过该activation后再经过max-pooling。支持relu, sigmoid, tanh | :param str activation: Convolution后的结果将通过该activation后再经过max-pooling。支持relu, sigmoid, tanh | ||||
""" | """ | ||||
def __init__(self, in_channels, out_channels, kernel_sizes, activation="relu"): | def __init__(self, in_channels, out_channels, kernel_sizes, activation="relu"): | ||||
super(ConvMaxpool, self).__init__() | super(ConvMaxpool, self).__init__() | ||||
for kernel_size in kernel_sizes: | for kernel_size in kernel_sizes: | ||||
assert kernel_size%2==1, "kernel size has to be odd numbers." | |||||
assert kernel_size % 2 == 1, "kernel size has to be odd numbers." | |||||
# convolution | # convolution | ||||
if isinstance(kernel_sizes, (list, tuple, int)): | if isinstance(kernel_sizes, (list, tuple, int)): | ||||
@@ -36,22 +37,22 @@ class ConvMaxpool(nn.Module): | |||||
" of kernel_sizes." | " of kernel_sizes." | ||||
else: | else: | ||||
raise ValueError("The type of out_channels and kernel_sizes should be the same.") | raise ValueError("The type of out_channels and kernel_sizes should be the same.") | ||||
self.convs = nn.ModuleList([nn.Conv1d( | self.convs = nn.ModuleList([nn.Conv1d( | ||||
in_channels=in_channels, | in_channels=in_channels, | ||||
out_channels=oc, | out_channels=oc, | ||||
kernel_size=ks, | kernel_size=ks, | ||||
stride=1, | stride=1, | ||||
padding=ks//2, | |||||
padding=ks // 2, | |||||
dilation=1, | dilation=1, | ||||
groups=1, | groups=1, | ||||
bias=None) | bias=None) | ||||
for oc, ks in zip(out_channels, kernel_sizes)]) | for oc, ks in zip(out_channels, kernel_sizes)]) | ||||
else: | else: | ||||
raise Exception( | raise Exception( | ||||
'Incorrect kernel sizes: should be list, tuple or int') | 'Incorrect kernel sizes: should be list, tuple or int') | ||||
# activation function | # activation function | ||||
if activation == 'relu': | if activation == 'relu': | ||||
self.activation = F.relu | self.activation = F.relu | ||||
@@ -10,9 +10,10 @@ import torch | |||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.utils.rnn as rnn | import torch.nn.utils.rnn as rnn | ||||
class LSTM(nn.Module): | class LSTM(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.LSTM` :class:`fastNLP.modules.encoder.lstm.LSTM` | |||||
别名::class:`fastNLP.modules.LSTM` :class:`fastNLP.modules.encoder.LSTM` | |||||
LSTM 模块, 轻量封装的Pytorch LSTM. 在提供seq_len的情况下,将自动使用pack_padded_sequence; 同时默认将forget gate的bias初始化 | LSTM 模块, 轻量封装的Pytorch LSTM. 在提供seq_len的情况下,将自动使用pack_padded_sequence; 同时默认将forget gate的bias初始化 | ||||
为1; 且可以应对DataParallel中LSTM的使用问题。 | 为1; 且可以应对DataParallel中LSTM的使用问题。 | ||||
@@ -26,7 +27,7 @@ class LSTM(nn.Module): | |||||
:(batch, seq, feature). Default: ``False`` | :(batch, seq, feature). Default: ``False`` | ||||
:param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | ||||
""" | """ | ||||
def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | ||||
bidirectional=False, bias=True): | bidirectional=False, bias=True): | ||||
super(LSTM, self).__init__() | super(LSTM, self).__init__() | ||||
@@ -10,7 +10,7 @@ import torch.nn as nn | |||||
class MaxPool(nn.Module): | class MaxPool(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.MaxPool` :class:`fastNLP.modules.encoder.pooling.MaxPool` | |||||
别名::class:`fastNLP.modules.MaxPool` :class:`fastNLP.modules.encoder.MaxPool` | |||||
Max-pooling模块。 | Max-pooling模块。 | ||||
@@ -21,9 +21,9 @@ class MaxPool(nn.Module): | |||||
:param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | :param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | ||||
:param ceil_mode: | :param ceil_mode: | ||||
""" | """ | ||||
def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False): | def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, ceil_mode=False): | ||||
super(MaxPool, self).__init__() | super(MaxPool, self).__init__() | ||||
assert (1 <= dimension) and (dimension <= 3) | assert (1 <= dimension) and (dimension <= 3) | ||||
self.dimension = dimension | self.dimension = dimension | ||||
@@ -32,7 +32,7 @@ class MaxPool(nn.Module): | |||||
self.dilation = dilation | self.dilation = dilation | ||||
self.kernel_size = kernel_size | self.kernel_size = kernel_size | ||||
self.ceil_mode = ceil_mode | self.ceil_mode = ceil_mode | ||||
def forward(self, x): | def forward(self, x): | ||||
if self.dimension == 1: | if self.dimension == 1: | ||||
pooling = nn.MaxPool1d( | pooling = nn.MaxPool1d( | ||||
@@ -59,15 +59,15 @@ class MaxPool(nn.Module): | |||||
class MaxPoolWithMask(nn.Module): | class MaxPoolWithMask(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.MaxPoolWithMask` :class:`fastNLP.modules.encoder.pooling.MaxPoolWithMask` | |||||
别名::class:`fastNLP.modules.MaxPoolWithMask` :class:`fastNLP.modules.encoder.MaxPoolWithMask` | |||||
带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。 | 带mask矩阵的max pooling。在做max-pooling的时候不会考虑mask值为0的位置。 | ||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
super(MaxPoolWithMask, self).__init__() | super(MaxPoolWithMask, self).__init__() | ||||
self.inf = 10e12 | self.inf = 10e12 | ||||
def forward(self, tensor, mask, dim=1): | def forward(self, tensor, mask, dim=1): | ||||
""" | """ | ||||
:param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor | :param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor | ||||
@@ -82,11 +82,11 @@ class MaxPoolWithMask(nn.Module): | |||||
class KMaxPool(nn.Module): | class KMaxPool(nn.Module): | ||||
"""K max-pooling module.""" | """K max-pooling module.""" | ||||
def __init__(self, k=1): | def __init__(self, k=1): | ||||
super(KMaxPool, self).__init__() | super(KMaxPool, self).__init__() | ||||
self.k = k | self.k = k | ||||
def forward(self, x): | def forward(self, x): | ||||
""" | """ | ||||
:param torch.Tensor x: [N, C, L] 初始tensor | :param torch.Tensor x: [N, C, L] 初始tensor | ||||
@@ -99,16 +99,16 @@ class KMaxPool(nn.Module): | |||||
class AvgPool(nn.Module): | class AvgPool(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.AvgPool` :class:`fastNLP.modules.encoder.pooling.AvgPool` | |||||
别名::class:`fastNLP.modules.AvgPool` :class:`fastNLP.modules.encoder.AvgPool` | |||||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size] | 给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size] | ||||
""" | """ | ||||
def __init__(self, stride=None, padding=0): | def __init__(self, stride=None, padding=0): | ||||
super(AvgPool, self).__init__() | super(AvgPool, self).__init__() | ||||
self.stride = stride | self.stride = stride | ||||
self.padding = padding | self.padding = padding | ||||
def forward(self, x): | def forward(self, x): | ||||
""" | """ | ||||
:param torch.Tensor x: [N, C, L] 初始tensor | :param torch.Tensor x: [N, C, L] 初始tensor | ||||
@@ -126,16 +126,16 @@ class AvgPool(nn.Module): | |||||
class AvgPoolWithMask(nn.Module): | class AvgPoolWithMask(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.AvgPoolWithMask` :class:`fastNLP.modules.encoder.pooling.AvgPoolWithMask` | |||||
别名::class:`fastNLP.modules.AvgPoolWithMask` :class:`fastNLP.modules.encoder.AvgPoolWithMask` | |||||
给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling | 给定形如[batch_size, max_len, hidden_size]的输入,在最后一维进行avg pooling. 输出为[batch_size, hidden_size], pooling | ||||
的时候只会考虑mask为1的位置 | 的时候只会考虑mask为1的位置 | ||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
super(AvgPoolWithMask, self).__init__() | super(AvgPoolWithMask, self).__init__() | ||||
self.inf = 10e12 | self.inf = 10e12 | ||||
def forward(self, tensor, mask, dim=1): | def forward(self, tensor, mask, dim=1): | ||||
""" | """ | ||||
:param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor | :param torch.FloatTensor tensor: [batch_size, seq_len, channels] 初始tensor | ||||
@@ -13,7 +13,7 @@ from torch.nn import functional as F | |||||
class StarTransformer(nn.Module): | class StarTransformer(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.StarTransformer` :class:`fastNLP.modules.encoder.star_transformer.StarTransformer` | |||||
别名::class:`fastNLP.modules.StarTransformer` :class:`fastNLP.modules.encoder.StarTransformer` | |||||
Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 | Star-Transformer 的encoder部分。 输入3d的文本输入, 返回相同长度的文本编码 | ||||
@@ -29,11 +29,11 @@ class StarTransformer(nn.Module): | |||||
模型会为输入序列加上position embedding。 | 模型会为输入序列加上position embedding。 | ||||
若为`None`,忽略加上position embedding的步骤. Default: `None` | 若为`None`,忽略加上position embedding的步骤. Default: `None` | ||||
""" | """ | ||||
def __init__(self, hidden_size, num_layers, num_head, head_dim, dropout=0.1, max_len=None): | def __init__(self, hidden_size, num_layers, num_head, head_dim, dropout=0.1, max_len=None): | ||||
super(StarTransformer, self).__init__() | super(StarTransformer, self).__init__() | ||||
self.iters = num_layers | self.iters = num_layers | ||||
self.norm = nn.ModuleList([nn.LayerNorm(hidden_size, eps=1e-6) for _ in range(self.iters)]) | self.norm = nn.ModuleList([nn.LayerNorm(hidden_size, eps=1e-6) for _ in range(self.iters)]) | ||||
# self.emb_fc = nn.Conv2d(hidden_size, hidden_size, 1) | # self.emb_fc = nn.Conv2d(hidden_size, hidden_size, 1) | ||||
self.emb_drop = nn.Dropout(dropout) | self.emb_drop = nn.Dropout(dropout) | ||||
@@ -43,12 +43,12 @@ class StarTransformer(nn.Module): | |||||
self.star_att = nn.ModuleList( | self.star_att = nn.ModuleList( | ||||
[_MSA2(hidden_size, nhead=num_head, head_dim=head_dim, dropout=0.0) | [_MSA2(hidden_size, nhead=num_head, head_dim=head_dim, dropout=0.0) | ||||
for _ in range(self.iters)]) | for _ in range(self.iters)]) | ||||
if max_len is not None: | if max_len is not None: | ||||
self.pos_emb = nn.Embedding(max_len, hidden_size) | self.pos_emb = nn.Embedding(max_len, hidden_size) | ||||
else: | else: | ||||
self.pos_emb = None | self.pos_emb = None | ||||
def forward(self, data, mask): | def forward(self, data, mask): | ||||
""" | """ | ||||
:param FloatTensor data: [batch, length, hidden] 输入的序列 | :param FloatTensor data: [batch, length, hidden] 输入的序列 | ||||
@@ -58,15 +58,15 @@ class StarTransformer(nn.Module): | |||||
[batch, hidden] 全局 relay 节点, 详见论文 | [batch, hidden] 全局 relay 节点, 详见论文 | ||||
""" | """ | ||||
def norm_func(f, x): | def norm_func(f, x): | ||||
# B, H, L, 1 | # B, H, L, 1 | ||||
return f(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) | return f(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) | ||||
B, L, H = data.size() | B, L, H = data.size() | ||||
mask = (mask == 0) # flip the mask for masked_fill_ | mask = (mask == 0) # flip the mask for masked_fill_ | ||||
smask = torch.cat([torch.zeros(B, 1, ).byte().to(mask), mask], 1) | smask = torch.cat([torch.zeros(B, 1, ).byte().to(mask), mask], 1) | ||||
embs = data.permute(0, 2, 1)[:, :, :, None] # B H L 1 | embs = data.permute(0, 2, 1)[:, :, :, None] # B H L 1 | ||||
if self.pos_emb and False: | if self.pos_emb and False: | ||||
P = self.pos_emb(torch.arange(L, dtype=torch.long, device=embs.device) \ | P = self.pos_emb(torch.arange(L, dtype=torch.long, device=embs.device) \ | ||||
@@ -80,13 +80,13 @@ class StarTransformer(nn.Module): | |||||
for i in range(self.iters): | for i in range(self.iters): | ||||
ax = torch.cat([r_embs, relay.expand(B, H, 1, L)], 2) | ax = torch.cat([r_embs, relay.expand(B, H, 1, L)], 2) | ||||
nodes = F.leaky_relu(self.ring_att[i](norm_func(self.norm[i], nodes), ax=ax)) | nodes = F.leaky_relu(self.ring_att[i](norm_func(self.norm[i], nodes), ax=ax)) | ||||
#nodes = F.leaky_relu(self.ring_att[i](nodes, ax=ax)) | |||||
# nodes = F.leaky_relu(self.ring_att[i](nodes, ax=ax)) | |||||
relay = F.leaky_relu(self.star_att[i](relay, torch.cat([relay, nodes], 2), smask)) | relay = F.leaky_relu(self.star_att[i](relay, torch.cat([relay, nodes], 2), smask)) | ||||
nodes = nodes.masked_fill_(ex_mask, 0) | nodes = nodes.masked_fill_(ex_mask, 0) | ||||
nodes = nodes.view(B, H, L).permute(0, 2, 1) | nodes = nodes.view(B, H, L).permute(0, 2, 1) | ||||
return nodes, relay.view(B, H) | return nodes, relay.view(B, H) | ||||
@@ -99,19 +99,19 @@ class _MSA1(nn.Module): | |||||
self.WK = nn.Conv2d(nhid, nhead * head_dim, 1) | self.WK = nn.Conv2d(nhid, nhead * head_dim, 1) | ||||
self.WV = nn.Conv2d(nhid, nhead * head_dim, 1) | self.WV = nn.Conv2d(nhid, nhead * head_dim, 1) | ||||
self.WO = nn.Conv2d(nhead * head_dim, nhid, 1) | self.WO = nn.Conv2d(nhead * head_dim, nhid, 1) | ||||
self.drop = nn.Dropout(dropout) | self.drop = nn.Dropout(dropout) | ||||
# print('NUM_HEAD', nhead, 'DIM_HEAD', head_dim) | # print('NUM_HEAD', nhead, 'DIM_HEAD', head_dim) | ||||
self.nhid, self.nhead, self.head_dim, self.unfold_size = nhid, nhead, head_dim, 3 | self.nhid, self.nhead, self.head_dim, self.unfold_size = nhid, nhead, head_dim, 3 | ||||
def forward(self, x, ax=None): | def forward(self, x, ax=None): | ||||
# x: B, H, L, 1, ax : B, H, X, L append features | # x: B, H, L, 1, ax : B, H, X, L append features | ||||
nhid, nhead, head_dim, unfold_size = self.nhid, self.nhead, self.head_dim, self.unfold_size | nhid, nhead, head_dim, unfold_size = self.nhid, self.nhead, self.head_dim, self.unfold_size | ||||
B, H, L, _ = x.shape | B, H, L, _ = x.shape | ||||
q, k, v = self.WQ(x), self.WK(x), self.WV(x) # x: (B,H,L,1) | q, k, v = self.WQ(x), self.WK(x), self.WV(x) # x: (B,H,L,1) | ||||
if ax is not None: | if ax is not None: | ||||
aL = ax.shape[2] | aL = ax.shape[2] | ||||
ak = self.WK(ax).view(B, nhead, head_dim, aL, L) | ak = self.WK(ax).view(B, nhead, head_dim, aL, L) | ||||
@@ -124,12 +124,12 @@ class _MSA1(nn.Module): | |||||
if ax is not None: | if ax is not None: | ||||
k = torch.cat([k, ak], 3) | k = torch.cat([k, ak], 3) | ||||
v = torch.cat([v, av], 3) | v = torch.cat([v, av], 3) | ||||
alphas = self.drop(F.softmax((q * k).sum(2, keepdim=True) / NP.sqrt(head_dim), 3)) # B N L 1 U | alphas = self.drop(F.softmax((q * k).sum(2, keepdim=True) / NP.sqrt(head_dim), 3)) # B N L 1 U | ||||
att = (alphas * v).sum(3).view(B, nhead * head_dim, L, 1) | att = (alphas * v).sum(3).view(B, nhead * head_dim, L, 1) | ||||
ret = self.WO(att) | ret = self.WO(att) | ||||
return ret | return ret | ||||
@@ -141,19 +141,19 @@ class _MSA2(nn.Module): | |||||
self.WK = nn.Conv2d(nhid, nhead * head_dim, 1) | self.WK = nn.Conv2d(nhid, nhead * head_dim, 1) | ||||
self.WV = nn.Conv2d(nhid, nhead * head_dim, 1) | self.WV = nn.Conv2d(nhid, nhead * head_dim, 1) | ||||
self.WO = nn.Conv2d(nhead * head_dim, nhid, 1) | self.WO = nn.Conv2d(nhead * head_dim, nhid, 1) | ||||
self.drop = nn.Dropout(dropout) | self.drop = nn.Dropout(dropout) | ||||
# print('NUM_HEAD', nhead, 'DIM_HEAD', head_dim) | # print('NUM_HEAD', nhead, 'DIM_HEAD', head_dim) | ||||
self.nhid, self.nhead, self.head_dim, self.unfold_size = nhid, nhead, head_dim, 3 | self.nhid, self.nhead, self.head_dim, self.unfold_size = nhid, nhead, head_dim, 3 | ||||
def forward(self, x, y, mask=None): | def forward(self, x, y, mask=None): | ||||
# x: B, H, 1, 1, 1 y: B H L 1 | # x: B, H, 1, 1, 1 y: B H L 1 | ||||
nhid, nhead, head_dim, unfold_size = self.nhid, self.nhead, self.head_dim, self.unfold_size | nhid, nhead, head_dim, unfold_size = self.nhid, self.nhead, self.head_dim, self.unfold_size | ||||
B, H, L, _ = y.shape | B, H, L, _ = y.shape | ||||
q, k, v = self.WQ(x), self.WK(y), self.WV(y) | q, k, v = self.WQ(x), self.WK(y), self.WV(y) | ||||
q = q.view(B, nhead, 1, head_dim) # B, H, 1, 1 -> B, N, 1, h | q = q.view(B, nhead, 1, head_dim) # B, H, 1, 1 -> B, N, 1, h | ||||
k = k.view(B, nhead, head_dim, L) # B, H, L, 1 -> B, N, h, L | k = k.view(B, nhead, head_dim, L) # B, H, L, 1 -> B, N, h, L | ||||
v = v.view(B, nhead, head_dim, L).permute(0, 1, 3, 2) # B, H, L, 1 -> B, N, L, h | v = v.view(B, nhead, head_dim, L).permute(0, 1, 3, 2) # B, H, L, 1 -> B, N, L, h | ||||
@@ -9,7 +9,7 @@ from ..dropout import TimestepDropout | |||||
class TransformerEncoder(nn.Module): | class TransformerEncoder(nn.Module): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.TransformerEncoder` :class:`fastNLP.modules.encoder.transformer.TransformerEncoder` | |||||
别名::class:`fastNLP.modules.TransformerEncoder` :class:`fastNLP.modules.encoder.TransformerEncoder` | |||||
transformer的encoder模块,不包含embedding层 | transformer的encoder模块,不包含embedding层 | ||||
@@ -22,7 +22,7 @@ class TransformerEncoder(nn.Module): | |||||
:param int num_head: head的数量。 | :param int num_head: head的数量。 | ||||
:param float dropout: dropout概率. Default: 0.1 | :param float dropout: dropout概率. Default: 0.1 | ||||
""" | """ | ||||
class SubLayer(nn.Module): | class SubLayer(nn.Module): | ||||
def __init__(self, model_size, inner_size, key_size, value_size, num_head, dropout=0.1): | def __init__(self, model_size, inner_size, key_size, value_size, num_head, dropout=0.1): | ||||
super(TransformerEncoder.SubLayer, self).__init__() | super(TransformerEncoder.SubLayer, self).__init__() | ||||
@@ -33,7 +33,7 @@ class TransformerEncoder(nn.Module): | |||||
nn.Linear(inner_size, model_size), | nn.Linear(inner_size, model_size), | ||||
TimestepDropout(dropout), ) | TimestepDropout(dropout), ) | ||||
self.norm2 = nn.LayerNorm(model_size) | self.norm2 = nn.LayerNorm(model_size) | ||||
def forward(self, input, seq_mask=None, atte_mask_out=None): | def forward(self, input, seq_mask=None, atte_mask_out=None): | ||||
""" | """ | ||||
@@ -48,11 +48,11 @@ class TransformerEncoder(nn.Module): | |||||
output = self.norm2(output + norm_atte) | output = self.norm2(output + norm_atte) | ||||
output *= seq_mask | output *= seq_mask | ||||
return output | return output | ||||
def __init__(self, num_layers, **kargs): | def __init__(self, num_layers, **kargs): | ||||
super(TransformerEncoder, self).__init__() | super(TransformerEncoder, self).__init__() | ||||
self.layers = nn.ModuleList([self.SubLayer(**kargs) for _ in range(num_layers)]) | self.layers = nn.ModuleList([self.SubLayer(**kargs) for _ in range(num_layers)]) | ||||
def forward(self, x, seq_mask=None): | def forward(self, x, seq_mask=None): | ||||
""" | """ | ||||
:param x: [batch, seq_len, model_size] 输入序列 | :param x: [batch, seq_len, model_size] 输入序列 | ||||
@@ -28,14 +28,14 @@ class VarRnnCellWrapper(nn.Module): | |||||
""" | """ | ||||
Wrapper for normal RNN Cells, make it support variational dropout | Wrapper for normal RNN Cells, make it support variational dropout | ||||
""" | """ | ||||
def __init__(self, cell, hidden_size, input_p, hidden_p): | def __init__(self, cell, hidden_size, input_p, hidden_p): | ||||
super(VarRnnCellWrapper, self).__init__() | super(VarRnnCellWrapper, self).__init__() | ||||
self.cell = cell | self.cell = cell | ||||
self.hidden_size = hidden_size | self.hidden_size = hidden_size | ||||
self.input_p = input_p | self.input_p = input_p | ||||
self.hidden_p = hidden_p | self.hidden_p = hidden_p | ||||
def forward(self, input_x, hidden, mask_x, mask_h, is_reversed=False): | def forward(self, input_x, hidden, mask_x, mask_h, is_reversed=False): | ||||
""" | """ | ||||
:param PackedSequence input_x: [seq_len, batch_size, input_size] | :param PackedSequence input_x: [seq_len, batch_size, input_size] | ||||
@@ -47,13 +47,13 @@ class VarRnnCellWrapper(nn.Module): | |||||
hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size] | hidden: for LSTM, tuple of (h_n, c_n), [batch_size, hidden_size] | ||||
for other RNN, h_n, [batch_size, hidden_size] | for other RNN, h_n, [batch_size, hidden_size] | ||||
""" | """ | ||||
def get_hi(hi, h0, size): | def get_hi(hi, h0, size): | ||||
h0_size = size - hi.size(0) | h0_size = size - hi.size(0) | ||||
if h0_size > 0: | if h0_size > 0: | ||||
return torch.cat([hi, h0[:h0_size]], dim=0) | return torch.cat([hi, h0[:h0_size]], dim=0) | ||||
return hi[:size] | return hi[:size] | ||||
is_lstm = isinstance(hidden, tuple) | is_lstm = isinstance(hidden, tuple) | ||||
input, batch_sizes = input_x.data, input_x.batch_sizes | input, batch_sizes = input_x.data, input_x.batch_sizes | ||||
output = [] | output = [] | ||||
@@ -64,7 +64,7 @@ class VarRnnCellWrapper(nn.Module): | |||||
else: | else: | ||||
batch_iter = batch_sizes | batch_iter = batch_sizes | ||||
idx = 0 | idx = 0 | ||||
if is_lstm: | if is_lstm: | ||||
hn = (hidden[0].clone(), hidden[1].clone()) | hn = (hidden[0].clone(), hidden[1].clone()) | ||||
else: | else: | ||||
@@ -91,7 +91,7 @@ class VarRnnCellWrapper(nn.Module): | |||||
hi = cell(input_i, hi) | hi = cell(input_i, hi) | ||||
hn[:size] = hi | hn[:size] = hi | ||||
output.append(hi) | output.append(hi) | ||||
if is_reversed: | if is_reversed: | ||||
output = list(reversed(output)) | output = list(reversed(output)) | ||||
output = torch.cat(output, dim=0) | output = torch.cat(output, dim=0) | ||||
@@ -117,7 +117,7 @@ class VarRNNBase(nn.Module): | |||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | ||||
""" | """ | ||||
def __init__(self, mode, Cell, input_size, hidden_size, num_layers=1, | def __init__(self, mode, Cell, input_size, hidden_size, num_layers=1, | ||||
bias=True, batch_first=False, | bias=True, batch_first=False, | ||||
input_dropout=0, hidden_dropout=0, bidirectional=False): | input_dropout=0, hidden_dropout=0, bidirectional=False): | ||||
@@ -141,7 +141,7 @@ class VarRNNBase(nn.Module): | |||||
cell, self.hidden_size, input_dropout, hidden_dropout)) | cell, self.hidden_size, input_dropout, hidden_dropout)) | ||||
initial_parameter(self) | initial_parameter(self) | ||||
self.is_lstm = (self.mode == "LSTM") | self.is_lstm = (self.mode == "LSTM") | ||||
def _forward_one(self, n_layer, n_direction, input, hx, mask_x, mask_h): | def _forward_one(self, n_layer, n_direction, input, hx, mask_x, mask_h): | ||||
is_lstm = self.is_lstm | is_lstm = self.is_lstm | ||||
idx = self.num_directions * n_layer + n_direction | idx = self.num_directions * n_layer + n_direction | ||||
@@ -150,7 +150,7 @@ class VarRNNBase(nn.Module): | |||||
output_x, hidden_x = cell( | output_x, hidden_x = cell( | ||||
input, hi, mask_x, mask_h, is_reversed=(n_direction == 1)) | input, hi, mask_x, mask_h, is_reversed=(n_direction == 1)) | ||||
return output_x, hidden_x | return output_x, hidden_x | ||||
def forward(self, x, hx=None): | def forward(self, x, hx=None): | ||||
""" | """ | ||||
@@ -170,13 +170,13 @@ class VarRNNBase(nn.Module): | |||||
else: | else: | ||||
max_batch_size = int(x.batch_sizes[0]) | max_batch_size = int(x.batch_sizes[0]) | ||||
x, batch_sizes = x.data, x.batch_sizes | x, batch_sizes = x.data, x.batch_sizes | ||||
if hx is None: | if hx is None: | ||||
hx = x.new_zeros(self.num_layers * self.num_directions, | hx = x.new_zeros(self.num_layers * self.num_directions, | ||||
max_batch_size, self.hidden_size, requires_grad=True) | max_batch_size, self.hidden_size, requires_grad=True) | ||||
if is_lstm: | if is_lstm: | ||||
hx = (hx, hx.new_zeros(hx.size(), requires_grad=True)) | hx = (hx, hx.new_zeros(hx.size(), requires_grad=True)) | ||||
mask_x = x.new_ones((max_batch_size, self.input_size)) | mask_x = x.new_ones((max_batch_size, self.input_size)) | ||||
mask_out = x.new_ones( | mask_out = x.new_ones( | ||||
(max_batch_size, self.hidden_size * self.num_directions)) | (max_batch_size, self.hidden_size * self.num_directions)) | ||||
@@ -185,7 +185,7 @@ class VarRNNBase(nn.Module): | |||||
training=self.training, inplace=True) | training=self.training, inplace=True) | ||||
nn.functional.dropout(mask_out, p=self.hidden_dropout, | nn.functional.dropout(mask_out, p=self.hidden_dropout, | ||||
training=self.training, inplace=True) | training=self.training, inplace=True) | ||||
hidden = x.new_zeros( | hidden = x.new_zeros( | ||||
(self.num_layers * self.num_directions, max_batch_size, self.hidden_size)) | (self.num_layers * self.num_directions, max_batch_size, self.hidden_size)) | ||||
if is_lstm: | if is_lstm: | ||||
@@ -207,22 +207,22 @@ class VarRNNBase(nn.Module): | |||||
else: | else: | ||||
hidden[idx] = hidden_x | hidden[idx] = hidden_x | ||||
x = torch.cat(output_list, dim=-1) | x = torch.cat(output_list, dim=-1) | ||||
if is_lstm: | if is_lstm: | ||||
hidden = (hidden, cellstate) | hidden = (hidden, cellstate) | ||||
if is_packed: | if is_packed: | ||||
output = PackedSequence(x, batch_sizes) | output = PackedSequence(x, batch_sizes) | ||||
else: | else: | ||||
x = PackedSequence(x, batch_sizes) | x = PackedSequence(x, batch_sizes) | ||||
output, _ = pad_packed_sequence(x, batch_first=self.batch_first) | output, _ = pad_packed_sequence(x, batch_first=self.batch_first) | ||||
return output, hidden | return output, hidden | ||||
class VarLSTM(VarRNNBase): | class VarLSTM(VarRNNBase): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.VarLSTM` :class:`fastNLP.modules.encoder.variational_rnn.VarLSTM` | |||||
别名::class:`fastNLP.modules.VarLSTM` :class:`fastNLP.modules.encoder.VarLSTM` | |||||
Variational Dropout LSTM. | Variational Dropout LSTM. | ||||
@@ -236,18 +236,18 @@ class VarLSTM(VarRNNBase): | |||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的LSTM. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的LSTM. Default: ``False`` | ||||
""" | """ | ||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarLSTM, self).__init__( | super(VarLSTM, self).__init__( | ||||
mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs) | mode="LSTM", Cell=nn.LSTMCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | def forward(self, x, hx=None): | ||||
return super(VarLSTM, self).forward(x, hx) | return super(VarLSTM, self).forward(x, hx) | ||||
class VarRNN(VarRNNBase): | class VarRNN(VarRNNBase): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.VarRNN` :class:`fastNLP.modules.encoder.variational_rnn.VarRNN` | |||||
别名::class:`fastNLP.modules.VarRNN` :class:`fastNLP.modules.encoder.VarRNN` | |||||
Variational Dropout RNN. | Variational Dropout RNN. | ||||
@@ -261,18 +261,18 @@ class VarRNN(VarRNNBase): | |||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的RNN. Default: ``False`` | ||||
""" | """ | ||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarRNN, self).__init__( | super(VarRNN, self).__init__( | ||||
mode="RNN", Cell=nn.RNNCell, *args, **kwargs) | mode="RNN", Cell=nn.RNNCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | def forward(self, x, hx=None): | ||||
return super(VarRNN, self).forward(x, hx) | return super(VarRNN, self).forward(x, hx) | ||||
class VarGRU(VarRNNBase): | class VarGRU(VarRNNBase): | ||||
""" | """ | ||||
别名::class:`fastNLP.modules.VarGRU` :class:`fastNLP.modules.encoder.variational_rnn.VarGRU` | |||||
别名::class:`fastNLP.modules.VarGRU` :class:`fastNLP.modules.encoder.VarGRU` | |||||
Variational Dropout GRU. | Variational Dropout GRU. | ||||
@@ -286,10 +286,10 @@ class VarGRU(VarRNNBase): | |||||
:param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | :param hidden_dropout: 对每个隐状态的dropout概率. Default: 0 | ||||
:param bidirectional: 若为 ``True``, 使用双向的GRU. Default: ``False`` | :param bidirectional: 若为 ``True``, 使用双向的GRU. Default: ``False`` | ||||
""" | """ | ||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super(VarGRU, self).__init__( | super(VarGRU, self).__init__( | ||||
mode="GRU", Cell=nn.GRUCell, *args, **kwargs) | mode="GRU", Cell=nn.GRUCell, *args, **kwargs) | ||||
def forward(self, x, hx=None): | def forward(self, x, hx=None): | ||||
return super(VarGRU, self).forward(x, hx) | return super(VarGRU, self).forward(x, hx) |