Browse Source

Merge branch 'dev0.4.0' of github.com:fastnlp/fastNLP into dev

tags/v0.4.10
yh_cc 5 years ago
parent
commit
beeb7d97cd
100 changed files with 672 additions and 984 deletions
  1. +0
    -7
      docs/source/fastNLP.component.bert_tokenizer.rst
  2. +0
    -15
      docs/source/fastNLP.component.rst
  3. +2
    -2
      docs/source/fastNLP.core.batch.rst
  4. +2
    -2
      docs/source/fastNLP.core.callback.rst
  5. +2
    -2
      docs/source/fastNLP.core.const.rst
  6. +2
    -2
      docs/source/fastNLP.core.dataset.rst
  7. +2
    -2
      docs/source/fastNLP.core.field.rst
  8. +2
    -2
      docs/source/fastNLP.core.instance.rst
  9. +2
    -2
      docs/source/fastNLP.core.losses.rst
  10. +2
    -2
      docs/source/fastNLP.core.metrics.rst
  11. +2
    -2
      docs/source/fastNLP.core.optimizer.rst
  12. +4
    -3
      docs/source/fastNLP.core.rst
  13. +2
    -2
      docs/source/fastNLP.core.sampler.rst
  14. +2
    -2
      docs/source/fastNLP.core.tester.rst
  15. +2
    -2
      docs/source/fastNLP.core.trainer.rst
  16. +2
    -2
      docs/source/fastNLP.core.utils.rst
  17. +2
    -2
      docs/source/fastNLP.core.vocabulary.rst
  18. +2
    -2
      docs/source/fastNLP.io.base_loader.rst
  19. +2
    -2
      docs/source/fastNLP.io.dataset_loader.rst
  20. +2
    -2
      docs/source/fastNLP.io.embed_loader.rst
  21. +2
    -2
      docs/source/fastNLP.io.model_io.rst
  22. +4
    -3
      docs/source/fastNLP.io.rst
  23. +2
    -2
      docs/source/fastNLP.models.base_model.rst
  24. +2
    -2
      docs/source/fastNLP.models.bert.rst
  25. +2
    -2
      docs/source/fastNLP.models.biaffine_parser.rst
  26. +2
    -2
      docs/source/fastNLP.models.cnn_text_classification.rst
  27. +2
    -2
      docs/source/fastNLP.models.enas_controller.rst
  28. +2
    -2
      docs/source/fastNLP.models.enas_model.rst
  29. +2
    -2
      docs/source/fastNLP.models.enas_trainer.rst
  30. +2
    -2
      docs/source/fastNLP.models.enas_utils.rst
  31. +4
    -3
      docs/source/fastNLP.models.rst
  32. +2
    -2
      docs/source/fastNLP.models.sequence_labeling.rst
  33. +2
    -2
      docs/source/fastNLP.models.snli.rst
  34. +2
    -2
      docs/source/fastNLP.models.star_transformer.rst
  35. +2
    -2
      docs/source/fastNLP.modules.aggregator.attention.rst
  36. +2
    -2
      docs/source/fastNLP.modules.aggregator.pooling.rst
  37. +4
    -3
      docs/source/fastNLP.modules.aggregator.rst
  38. +2
    -2
      docs/source/fastNLP.modules.decoder.CRF.rst
  39. +2
    -2
      docs/source/fastNLP.modules.decoder.MLP.rst
  40. +4
    -3
      docs/source/fastNLP.modules.decoder.rst
  41. +2
    -2
      docs/source/fastNLP.modules.decoder.utils.rst
  42. +2
    -2
      docs/source/fastNLP.modules.encoder.bert.rst
  43. +2
    -2
      docs/source/fastNLP.modules.encoder.char_encoder.rst
  44. +2
    -2
      docs/source/fastNLP.modules.encoder.conv_maxpool.rst
  45. +2
    -2
      docs/source/fastNLP.modules.encoder.embedding.rst
  46. +2
    -2
      docs/source/fastNLP.modules.encoder.lstm.rst
  47. +4
    -3
      docs/source/fastNLP.modules.encoder.rst
  48. +2
    -2
      docs/source/fastNLP.modules.encoder.star_transformer.rst
  49. +2
    -2
      docs/source/fastNLP.modules.encoder.transformer.rst
  50. +2
    -2
      docs/source/fastNLP.modules.encoder.variational_rnn.rst
  51. +4
    -3
      docs/source/fastNLP.modules.rst
  52. +5
    -4
      docs/source/fastNLP.rst
  53. +15
    -21
      docs/source/index.rst
  54. +1
    -0
      docs/source/modules.rst
  55. +0
    -376
      docs/source/tutorials/fastnlp_10tmin_tutorial.rst
  56. +0
    -113
      docs/source/tutorials/fastnlp_1_minute_tutorial.rst
  57. +0
    -5
      docs/source/tutorials/fastnlp_advanced_tutorial.rst
  58. +0
    -5
      docs/source/tutorials/fastnlp_developer_guide.rst
  59. +1
    -1
      docs/source/user/quickstart.rst
  60. +0
    -3
      docs/source/user/task1.rst
  61. +0
    -3
      docs/source/user/task2.rst
  62. +371
    -0
      docs/source/user/tutorial_one.rst
  63. +5
    -0
      docs/source/user/with_fitlog.rst
  64. +5
    -7
      fastNLP/__init__.py
  65. +5
    -4
      fastNLP/core/__init__.py
  66. +19
    -11
      fastNLP/core/callback.py
  67. +6
    -1
      fastNLP/core/const.py
  68. +24
    -27
      fastNLP/core/dataset.py
  69. +1
    -2
      fastNLP/core/losses.py
  70. +2
    -3
      fastNLP/core/utils.py
  71. +6
    -7
      fastNLP/io/__init__.py
  72. +2
    -1
      fastNLP/io/base_loader.py
  73. +2
    -3
      fastNLP/io/dataset_loader.py
  74. +1
    -6
      fastNLP/io/embed_loader.py
  75. +7
    -2
      fastNLP/models/__init__.py
  76. +2
    -2
      fastNLP/models/enas_trainer.py
  77. +36
    -4
      fastNLP/modules/__init__.py
  78. +8
    -1
      fastNLP/modules/aggregator/__init__.py
  79. +2
    -1
      fastNLP/modules/aggregator/attention.py
  80. +7
    -1
      fastNLP/modules/decoder/__init__.py
  81. +6
    -4
      fastNLP/modules/encoder/__init__.py
  82. +4
    -1
      fastNLP/modules/utils.py
  83. +0
    -0
      legacy/api/README.md
  84. +0
    -0
      legacy/api/__init__.py
  85. +4
    -4
      legacy/api/api.py
  86. +0
    -0
      legacy/api/converter.py
  87. +0
    -0
      legacy/api/examples.py
  88. +0
    -0
      legacy/api/pipeline.py
  89. +6
    -9
      legacy/api/processor.py
  90. +1
    -1
      legacy/api/utils.py
  91. +0
    -0
      legacy/automl/__init__.py
  92. +0
    -0
      legacy/automl/enas_controller.py
  93. +0
    -0
      legacy/automl/enas_model.py
  94. +6
    -6
      legacy/automl/enas_trainer.py
  95. +0
    -0
      legacy/automl/enas_utils.py
  96. +0
    -0
      legacy/component/__init__.py
  97. +0
    -0
      legacy/component/bert_tokenizer.py
  98. +0
    -6
      test/api/test_pipeline.py
  99. +0
    -101
      test/api/test_processor.py
  100. +0
    -111
      test/automl/test_enas.py

+ 0
- 7
docs/source/fastNLP.component.bert_tokenizer.rst View File

@@ -1,7 +0,0 @@
fastNLP.component.bert\_tokenizer module
========================================

.. automodule:: fastNLP.component.bert_tokenizer
:members:
:undoc-members:
:show-inheritance:

+ 0
- 15
docs/source/fastNLP.component.rst View File

@@ -1,15 +0,0 @@
fastNLP.component package
=========================

.. automodule:: fastNLP.component
:members:
:undoc-members:
:show-inheritance:

Submodules
----------

.. toctree::

fastNLP.component.bert_tokenizer


+ 2
- 2
docs/source/fastNLP.core.batch.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.batch module
=========================
fastNLP.core.batch
==================


.. automodule:: fastNLP.core.batch .. automodule:: fastNLP.core.batch
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.callback.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.callback module
============================
fastNLP.core.callback
=====================


.. automodule:: fastNLP.core.callback .. automodule:: fastNLP.core.callback
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.const.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.const module
=========================
fastNLP.core.const
==================


.. automodule:: fastNLP.core.const .. automodule:: fastNLP.core.const
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.dataset.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.dataset module
===========================
fastNLP.core.dataset
====================


.. automodule:: fastNLP.core.dataset .. automodule:: fastNLP.core.dataset
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.field.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.field module
=========================
fastNLP.core.field
==================


.. automodule:: fastNLP.core.field .. automodule:: fastNLP.core.field
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.instance.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.instance module
============================
fastNLP.core.instance
=====================


.. automodule:: fastNLP.core.instance .. automodule:: fastNLP.core.instance
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.losses.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.losses module
==========================
fastNLP.core.losses
===================


.. automodule:: fastNLP.core.losses .. automodule:: fastNLP.core.losses
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.metrics.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.metrics module
===========================
fastNLP.core.metrics
====================


.. automodule:: fastNLP.core.metrics .. automodule:: fastNLP.core.metrics
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.optimizer.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.optimizer module
=============================
fastNLP.core.optimizer
======================


.. automodule:: fastNLP.core.optimizer .. automodule:: fastNLP.core.optimizer
:members: :members:


+ 4
- 3
docs/source/fastNLP.core.rst View File

@@ -1,15 +1,16 @@
fastNLP.core package
====================
fastNLP.core
============


.. automodule:: fastNLP.core .. automodule:: fastNLP.core
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.core.batch fastNLP.core.batch
fastNLP.core.callback fastNLP.core.callback


+ 2
- 2
docs/source/fastNLP.core.sampler.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.sampler module
===========================
fastNLP.core.sampler
====================


.. automodule:: fastNLP.core.sampler .. automodule:: fastNLP.core.sampler
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.tester.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.tester module
==========================
fastNLP.core.tester
===================


.. automodule:: fastNLP.core.tester .. automodule:: fastNLP.core.tester
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.trainer.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.trainer module
===========================
fastNLP.core.trainer
====================


.. automodule:: fastNLP.core.trainer .. automodule:: fastNLP.core.trainer
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.utils.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.utils module
=========================
fastNLP.core.utils
==================


.. automodule:: fastNLP.core.utils .. automodule:: fastNLP.core.utils
:members: :members:


+ 2
- 2
docs/source/fastNLP.core.vocabulary.rst View File

@@ -1,5 +1,5 @@
fastNLP.core.vocabulary module
==============================
fastNLP.core.vocabulary
=======================


.. automodule:: fastNLP.core.vocabulary .. automodule:: fastNLP.core.vocabulary
:members: :members:


+ 2
- 2
docs/source/fastNLP.io.base_loader.rst View File

@@ -1,5 +1,5 @@
fastNLP.io.base\_loader module
==============================
fastNLP.io.base\_loader
=======================


.. automodule:: fastNLP.io.base_loader .. automodule:: fastNLP.io.base_loader
:members: :members:


+ 2
- 2
docs/source/fastNLP.io.dataset_loader.rst View File

@@ -1,5 +1,5 @@
fastNLP.io.dataset\_loader module
=================================
fastNLP.io.dataset\_loader
==========================


.. automodule:: fastNLP.io.dataset_loader .. automodule:: fastNLP.io.dataset_loader
:members: :members:


+ 2
- 2
docs/source/fastNLP.io.embed_loader.rst View File

@@ -1,5 +1,5 @@
fastNLP.io.embed\_loader module
===============================
fastNLP.io.embed\_loader
========================


.. automodule:: fastNLP.io.embed_loader .. automodule:: fastNLP.io.embed_loader
:members: :members:


+ 2
- 2
docs/source/fastNLP.io.model_io.rst View File

@@ -1,5 +1,5 @@
fastNLP.io.model\_io module
===========================
fastNLP.io.model\_io
====================


.. automodule:: fastNLP.io.model_io .. automodule:: fastNLP.io.model_io
:members: :members:


+ 4
- 3
docs/source/fastNLP.io.rst View File

@@ -1,15 +1,16 @@
fastNLP.io package
==================
fastNLP.io
==========


.. automodule:: fastNLP.io .. automodule:: fastNLP.io
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.io.base_loader fastNLP.io.base_loader
fastNLP.io.dataset_loader fastNLP.io.dataset_loader


+ 2
- 2
docs/source/fastNLP.models.base_model.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.base\_model module
=================================
fastNLP.models.base\_model
==========================


.. automodule:: fastNLP.models.base_model .. automodule:: fastNLP.models.base_model
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.bert.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.bert module
==========================
fastNLP.models.bert
===================


.. automodule:: fastNLP.models.bert .. automodule:: fastNLP.models.bert
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.biaffine_parser.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.biaffine\_parser module
======================================
fastNLP.models.biaffine\_parser
===============================


.. automodule:: fastNLP.models.biaffine_parser .. automodule:: fastNLP.models.biaffine_parser
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.cnn_text_classification.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.cnn\_text\_classification module
===============================================
fastNLP.models.cnn\_text\_classification
========================================


.. automodule:: fastNLP.models.cnn_text_classification .. automodule:: fastNLP.models.cnn_text_classification
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.enas_controller.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.enas\_controller module
======================================
fastNLP.models.enas\_controller
===============================


.. automodule:: fastNLP.models.enas_controller .. automodule:: fastNLP.models.enas_controller
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.enas_model.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.enas\_model module
=================================
fastNLP.models.enas\_model
==========================


.. automodule:: fastNLP.models.enas_model .. automodule:: fastNLP.models.enas_model
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.enas_trainer.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.enas\_trainer module
===================================
fastNLP.models.enas\_trainer
============================


.. automodule:: fastNLP.models.enas_trainer .. automodule:: fastNLP.models.enas_trainer
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.enas_utils.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.enas\_utils module
=================================
fastNLP.models.enas\_utils
==========================


.. automodule:: fastNLP.models.enas_utils .. automodule:: fastNLP.models.enas_utils
:members: :members:


+ 4
- 3
docs/source/fastNLP.models.rst View File

@@ -1,15 +1,16 @@
fastNLP.models package
======================
fastNLP.models
==============


.. automodule:: fastNLP.models .. automodule:: fastNLP.models
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.models.base_model fastNLP.models.base_model
fastNLP.models.bert fastNLP.models.bert


+ 2
- 2
docs/source/fastNLP.models.sequence_labeling.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.sequence\_labeling module
========================================
fastNLP.models.sequence\_labeling
=================================


.. automodule:: fastNLP.models.sequence_labeling .. automodule:: fastNLP.models.sequence_labeling
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.snli.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.snli module
==========================
fastNLP.models.snli
===================


.. automodule:: fastNLP.models.snli .. automodule:: fastNLP.models.snli
:members: :members:


+ 2
- 2
docs/source/fastNLP.models.star_transformer.rst View File

@@ -1,5 +1,5 @@
fastNLP.models.star\_transformer module
=======================================
fastNLP.models.star\_transformer
================================


.. automodule:: fastNLP.models.star_transformer .. automodule:: fastNLP.models.star_transformer
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.aggregator.attention.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.aggregator.attention module
===========================================
fastNLP.modules.aggregator.attention
====================================


.. automodule:: fastNLP.modules.aggregator.attention .. automodule:: fastNLP.modules.aggregator.attention
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.aggregator.pooling.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.aggregator.pooling module
=========================================
fastNLP.modules.aggregator.pooling
==================================


.. automodule:: fastNLP.modules.aggregator.pooling .. automodule:: fastNLP.modules.aggregator.pooling
:members: :members:


+ 4
- 3
docs/source/fastNLP.modules.aggregator.rst View File

@@ -1,15 +1,16 @@
fastNLP.modules.aggregator package
==================================
fastNLP.modules.aggregator
==========================


.. automodule:: fastNLP.modules.aggregator .. automodule:: fastNLP.modules.aggregator
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.modules.aggregator.attention fastNLP.modules.aggregator.attention
fastNLP.modules.aggregator.pooling fastNLP.modules.aggregator.pooling


+ 2
- 2
docs/source/fastNLP.modules.decoder.CRF.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.decoder.CRF module
==================================
fastNLP.modules.decoder.CRF
===========================


.. automodule:: fastNLP.modules.decoder.CRF .. automodule:: fastNLP.modules.decoder.CRF
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.decoder.MLP.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.decoder.MLP module
==================================
fastNLP.modules.decoder.MLP
===========================


.. automodule:: fastNLP.modules.decoder.MLP .. automodule:: fastNLP.modules.decoder.MLP
:members: :members:


+ 4
- 3
docs/source/fastNLP.modules.decoder.rst View File

@@ -1,15 +1,16 @@
fastNLP.modules.decoder package
===============================
fastNLP.modules.decoder
=======================


.. automodule:: fastNLP.modules.decoder .. automodule:: fastNLP.modules.decoder
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.modules.decoder.CRF fastNLP.modules.decoder.CRF
fastNLP.modules.decoder.MLP fastNLP.modules.decoder.MLP


+ 2
- 2
docs/source/fastNLP.modules.decoder.utils.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.decoder.utils module
====================================
fastNLP.modules.decoder.utils
=============================


.. automodule:: fastNLP.modules.decoder.utils .. automodule:: fastNLP.modules.decoder.utils
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.bert.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.bert module
===================================
fastNLP.modules.encoder.bert
============================


.. automodule:: fastNLP.modules.encoder.bert .. automodule:: fastNLP.modules.encoder.bert
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.char_encoder.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.char\_encoder module
============================================
fastNLP.modules.encoder.char\_encoder
=====================================


.. automodule:: fastNLP.modules.encoder.char_encoder .. automodule:: fastNLP.modules.encoder.char_encoder
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.conv_maxpool.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.conv\_maxpool module
============================================
fastNLP.modules.encoder.conv\_maxpool
=====================================


.. automodule:: fastNLP.modules.encoder.conv_maxpool .. automodule:: fastNLP.modules.encoder.conv_maxpool
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.embedding.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.embedding module
========================================
fastNLP.modules.encoder.embedding
=================================


.. automodule:: fastNLP.modules.encoder.embedding .. automodule:: fastNLP.modules.encoder.embedding
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.lstm.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.lstm module
===================================
fastNLP.modules.encoder.lstm
============================


.. automodule:: fastNLP.modules.encoder.lstm .. automodule:: fastNLP.modules.encoder.lstm
:members: :members:


+ 4
- 3
docs/source/fastNLP.modules.encoder.rst View File

@@ -1,15 +1,16 @@
fastNLP.modules.encoder package
===============================
fastNLP.modules.encoder
=======================


.. automodule:: fastNLP.modules.encoder .. automodule:: fastNLP.modules.encoder
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Submodules
子模块
---------- ----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.modules.encoder.bert fastNLP.modules.encoder.bert
fastNLP.modules.encoder.char_encoder fastNLP.modules.encoder.char_encoder


+ 2
- 2
docs/source/fastNLP.modules.encoder.star_transformer.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.star\_transformer module
================================================
fastNLP.modules.encoder.star\_transformer
=========================================


.. automodule:: fastNLP.modules.encoder.star_transformer .. automodule:: fastNLP.modules.encoder.star_transformer
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.transformer.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.transformer module
==========================================
fastNLP.modules.encoder.transformer
===================================


.. automodule:: fastNLP.modules.encoder.transformer .. automodule:: fastNLP.modules.encoder.transformer
:members: :members:


+ 2
- 2
docs/source/fastNLP.modules.encoder.variational_rnn.rst View File

@@ -1,5 +1,5 @@
fastNLP.modules.encoder.variational\_rnn module
===============================================
fastNLP.modules.encoder.variational\_rnn
========================================


.. automodule:: fastNLP.modules.encoder.variational_rnn .. automodule:: fastNLP.modules.encoder.variational_rnn
:members: :members:


+ 4
- 3
docs/source/fastNLP.modules.rst View File

@@ -1,15 +1,16 @@
fastNLP.modules package
=======================
fastNLP.modules
===============


.. automodule:: fastNLP.modules .. automodule:: fastNLP.modules
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Subpackages
子模块
----------- -----------


.. toctree:: .. toctree::
:titlesonly:


fastNLP.modules.aggregator fastNLP.modules.aggregator
fastNLP.modules.decoder fastNLP.modules.decoder

+ 5
- 4
docs/source/fastNLP.rst View File

@@ -1,4 +1,4 @@
fastNLP package
API 文档
=============== ===============


.. automodule:: fastNLP .. automodule:: fastNLP
@@ -6,14 +6,15 @@ fastNLP package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


Subpackages
内部模块
----------- -----------


.. toctree:: .. toctree::
:titlesonly:
:maxdepth: 3


fastNLP.component
fastNLP.core fastNLP.core
fastNLP.io fastNLP.io
fastNLP.models
fastNLP.modules fastNLP.modules
fastNLP.models



+ 15
- 21
docs/source/index.rst View File

@@ -12,19 +12,21 @@ fastNLP 是一款轻量级的 NLP 处理套件。你既可以使用它快速地
- 便捷且具有扩展性的训练器; 提供多种内置callback函数,方便实验记录、异常捕获等。 - 便捷且具有扩展性的训练器; 提供多种内置callback函数,方便实验记录、异常捕获等。




内置的模块
内置组件
------------ ------------


(TODO)
大部分用于的 NLP 任务神经网络都可以看做由编码(encoder)、聚合(aggregator)、解码(decoder)三种模块组成。


.. image:: figures/text_classification.png


主要包含了以下的三大模块:
fastNLP 在 :mod:`~fastNLP.modules` 模块中内置了三种模块的诸多组件,可以帮助用户快速搭建自己所需的网络。
三种模块的功能和常见组件如下:


+-----------------------+-----------------------+-----------------------+ +-----------------------+-----------------------+-----------------------+
| module type | functionality | example | | module type | functionality | example |
+=======================+=======================+=======================+ +=======================+=======================+=======================+
| encoder | 将输入编码为具有具 | embedding, RNN, CNN, | | encoder | 将输入编码为具有具 | embedding, RNN, CNN, |
| | 有表示能力的向量 | transformer |
| | 有表示能力的向量 | transformer |
+-----------------------+-----------------------+-----------------------+ +-----------------------+-----------------------+-----------------------+
| aggregator | 从多个向量中聚合信息 | self-attention, | | aggregator | 从多个向量中聚合信息 | self-attention, |
| | | max-pooling | | | | max-pooling |
@@ -35,32 +37,24 @@ fastNLP 是一款轻量级的 NLP 处理套件。你既可以使用它快速地
+-----------------------+-----------------------+-----------------------+ +-----------------------+-----------------------+-----------------------+




For example:

.. image:: figures/text_classification.png



各个任务上的结果
-----------------------

(TODO)

快速入门
-------------
内置模型
----------------


TODO
fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models.CNNText` 、
:class:`~fastNLP.models.SeqLabeling` 等完整的模型,以供用户直接使用。


.. todo::
这些模型的介绍如下表所示:(模型名称 + 介绍 + 任务上的结果)


用户手册 用户手册
---------------
----------------


.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1


安装指南 <user/installation> 安装指南 <user/installation>
快速入门 <user/quickstart> 快速入门 <user/quickstart>
详细指南 <user/tutorials>
详细指南 <user/tutorial_one>
科研向导 <user/with_fitlog> 科研向导 <user/with_fitlog>




@@ -70,7 +64,7 @@ API 文档
除了用户手册之外,你还可以通过查阅 API 文档来找到你所需要的工具。 除了用户手册之外,你还可以通过查阅 API 文档来找到你所需要的工具。


.. toctree:: .. toctree::
:maxdepth: 2
:titlesonly:
fastNLP fastNLP




+ 1
- 0
docs/source/modules.rst View File

@@ -2,6 +2,7 @@ fastNLP
======= =======


.. toctree:: .. toctree::
:titlesonly:
:maxdepth: 4 :maxdepth: 4


fastNLP fastNLP

+ 0
- 376
docs/source/tutorials/fastnlp_10tmin_tutorial.rst View File

@@ -1,376 +0,0 @@
fastNLP 10分钟上手教程
===============

教程原文见 https://github.com/fastnlp/fastNLP/blob/master/tutorials/fastnlp_10min_tutorial.ipynb

fastNLP提供方便的数据预处理,训练和测试模型的功能

DataSet & Instance
------------------

fastNLP用DataSet和Instance保存和处理数据。每个DataSet表示一个数据集,每个Instance表示一个数据样本。一个DataSet存有多个Instance,每个Instance可以自定义存哪些内容。

有一些read\_\*方法,可以轻松从文件读取数据,存成DataSet。

.. code:: ipython3

from fastNLP import DataSet
from fastNLP import Instance
# 从csv读取数据到DataSet
win_path = "C:\\Users\zyfeng\Desktop\FudanNLP\\fastNLP\\test\\data_for_tests\\tutorial_sample_dataset.csv"
dataset = DataSet.read_csv(win_path, headers=('raw_sentence', 'label'), sep='\t')
print(dataset[0])


.. parsed-literal::

{'raw_sentence': A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .,
'label': 1}

.. code:: ipython3

# DataSet.append(Instance)加入新数据
dataset.append(Instance(raw_sentence='fake data', label='0'))
dataset[-1]




.. parsed-literal::

{'raw_sentence': fake data,
'label': 0}



.. code:: ipython3

# DataSet.apply(func, new_field_name)对数据预处理
# 将所有数字转为小写
dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence')
# label转int
dataset.apply(lambda x: int(x['label']), new_field_name='label_seq', is_target=True)
# 使用空格分割句子
dataset.drop(lambda x: len(x['raw_sentence'].split()) == 0)
def split_sent(ins):
return ins['raw_sentence'].split()
dataset.apply(split_sent, new_field_name='words', is_input=True)

.. code:: ipython3

# DataSet.drop(func)筛除数据
# 删除低于某个长度的词语
dataset.drop(lambda x: len(x['words']) <= 3)

.. code:: ipython3

# 分出测试集、训练集
test_data, train_data = dataset.split(0.3)
print("Train size: ", len(test_data))
print("Test size: ", len(train_data))


.. parsed-literal::

Train size: 54
Test size:

Vocabulary
----------

fastNLP中的Vocabulary轻松构建词表,将词转成数字

.. code:: ipython3

from fastNLP import Vocabulary
# 构建词表, Vocabulary.add(word)
vocab = Vocabulary(min_freq=2)
train_data.apply(lambda x: [vocab.add(word) for word in x['words']])
vocab.build_vocab()
# index句子, Vocabulary.to_index(word)
train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', is_input=True)
test_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', is_input=True)
print(test_data[0])


.. parsed-literal::

{'raw_sentence': the plot is romantic comedy boilerplate from start to finish .,
'label': 2,
'label_seq': 2,
'words': ['the', 'plot', 'is', 'romantic', 'comedy', 'boilerplate', 'from', 'start', 'to', 'finish', '.'],
'word_seq': [2, 13, 9, 24, 25, 26, 15, 27, 11, 28, 3]}

.. code:: ipython3

# 假设你们需要做强化学习或者gan之类的项目,也许你们可以使用这里的dataset
from fastNLP.core.batch import Batch
from fastNLP.core.sampler import RandomSampler
batch_iterator = Batch(dataset=train_data, batch_size=2, sampler=RandomSampler())
for batch_x, batch_y in batch_iterator:
print("batch_x has: ", batch_x)
print("batch_y has: ", batch_y)
break


.. parsed-literal::

batch_x has: {'words': array([list(['this', 'kind', 'of', 'hands-on', 'storytelling', 'is', 'ultimately', 'what', 'makes', 'shanghai', 'ghetto', 'move', 'beyond', 'a', 'good', ',', 'dry', ',', 'reliable', 'textbook', 'and', 'what', 'allows', 'it', 'to', 'rank', 'with', 'its', 'worthy', 'predecessors', '.']),
list(['the', 'entire', 'movie', 'is', 'filled', 'with', 'deja', 'vu', 'moments', '.'])],
dtype=object), 'word_seq': tensor([[ 19, 184, 6, 1, 481, 9, 206, 50, 91, 1210, 1609, 1330,
495, 5, 63, 4, 1269, 4, 1, 1184, 7, 50, 1050, 10,
8, 1611, 16, 21, 1039, 1, 2],
[ 3, 711, 22, 9, 1282, 16, 2482, 2483, 200, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0]])}
batch_y has: {'label_seq': tensor([3, 2])}

Model
-----

.. code:: ipython3

# 定义一个简单的Pytorch模型
from fastNLP.models import CNNText
model = CNNText(embed_num=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1)
model




.. parsed-literal::

CNNText(
(embed): Embedding(
(embed): Embedding(77, 50, padding_idx=0)
(dropout): Dropout(p=0.0)
)
(conv_pool): ConvMaxpool(
(convs): ModuleList(
(0): Conv1d(50, 3, kernel_size=(3,), stride=(1,), padding=(2,))
(1): Conv1d(50, 4, kernel_size=(4,), stride=(1,), padding=(2,))
(2): Conv1d(50, 5, kernel_size=(5,), stride=(1,), padding=(2,))
)
)
(dropout): Dropout(p=0.1)
(fc): Linear(
(linear): Linear(in_features=12, out_features=5, bias=True)
)
)



Trainer & Tester
----------------

使用fastNLP的Trainer训练模型

.. code:: ipython3

from fastNLP import Trainer
from copy import deepcopy
from fastNLP import CrossEntropyLoss
from fastNLP import AccuracyMetric

.. code:: ipython3

# 进行overfitting测试
copy_model = deepcopy(model)
overfit_trainer = Trainer(model=copy_model,
train_data=test_data,
dev_data=test_data,
loss=CrossEntropyLoss(pred="output", target="label_seq"),
metrics=AccuracyMetric(),
n_epochs=10,
save_path=None)
overfit_trainer.train()


.. parsed-literal::

training epochs started 2018-12-07 14:07:20



.. parsed-literal::

HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=20), HTML(value='')), layout=Layout(display='…



.. parsed-literal::

Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.037037
Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.296296
Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.333333
Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.555556
Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.611111
Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.481481
Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.62963
Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.685185
Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.722222
Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.777778

.. code:: ipython3

# 实例化Trainer,传入模型和数据,进行训练
trainer = Trainer(model=model,
train_data=train_data,
dev_data=test_data,
loss=CrossEntropyLoss(pred="output", target="label_seq"),
metrics=AccuracyMetric(),
n_epochs=5)
trainer.train()
print('Train finished!')


.. parsed-literal::

training epochs started 2018-12-07 14:08:10



.. parsed-literal::

HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=5), HTML(value='')), layout=Layout(display='i…



.. parsed-literal::

Epoch 1/5. Step:1/5. AccuracyMetric: acc=0.037037
Epoch 2/5. Step:2/5. AccuracyMetric: acc=0.037037
Epoch 3/5. Step:3/5. AccuracyMetric: acc=0.037037
Epoch 4/5. Step:4/5. AccuracyMetric: acc=0.185185
Epoch 5/5. Step:5/5. AccuracyMetric: acc=0.240741
Train finished!

.. code:: ipython3

from fastNLP import Tester
tester = Tester(data=test_data, model=model, metrics=AccuracyMetric())
acc = tester.test()


.. parsed-literal::

[tester]
AccuracyMetric: acc=0.240741

In summary
----------

fastNLP Trainer的伪代码逻辑
---------------------------

1. 准备DataSet,假设DataSet中共有如下的fields
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

::

['raw_sentence', 'word_seq1', 'word_seq2', 'raw_label','label']
通过
DataSet.set_input('word_seq1', word_seq2', flag=True)将'word_seq1', 'word_seq2'设置为input
通过
DataSet.set_target('label', flag=True)将'label'设置为target

2. 初始化模型
~~~~~~~~~~~~~

::

class Model(nn.Module):
def __init__(self):
xxx
def forward(self, word_seq1, word_seq2):
# (1) 这里使用的形参名必须和DataSet中的input field的名称对应。因为我们是通过形参名, 进行赋值的
# (2) input field的数量可以多于这里的形参数量。但是不能少于。
xxxx
# 输出必须是一个dict

3. Trainer的训练过程
~~~~~~~~~~~~~~~~~~~~

::

(1) 从DataSet中按照batch_size取出一个batch,调用Model.forward
(2) 将 Model.forward的结果 与 标记为target的field 传入Losser当中。
由于每个人写的Model.forward的output的dict可能key并不一样,比如有人是{'pred':xxx}, {'output': xxx};
另外每个人将target可能也会设置为不同的名称, 比如有人是label, 有人设置为target;
为了解决以上的问题,我们的loss提供映射机制
比如CrossEntropyLosser的需要的输入是(prediction, target)。但是forward的output是{'output': xxx}; 'label'是target
那么初始化losser的时候写为CrossEntropyLosser(prediction='output', target='label')即可
(3) 对于Metric是同理的
Metric计算也是从 forward的结果中取值 与 设置target的field中取值。 也是可以通过映射找到对应的值

一些问题.
---------

1. DataSet中为什么需要设置input和target
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

::

只有被设置为input或者target的数据才会在train的过程中被取出来
(1.1) 我们只会在设置为input的field中寻找传递给Model.forward的参数。
(1.2) 我们在传递值给losser或者metric的时候会使用来自:
(a)Model.forward的output
(b)被设置为target的field

2. 我们是通过forwad中的形参名将DataSet中的field赋值给对应的参数
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

::

(1.1) 构建模型过程中,
例如:
DataSet中x,seq_lens是input,那么forward就应该是
def forward(self, x, seq_lens):
pass
我们是通过形参名称进行匹配的field的

1. 加载数据到DataSet
~~~~~~~~~~~~~~~~~~~~

2. 使用apply操作对DataSet进行预处理
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

::

(2.1) 处理过程中将某些field设置为input,某些field设置为target

3. 构建模型
~~~~~~~~~~~

::

(3.1) 构建模型过程中,需要注意forward函数的形参名需要和DataSet中设置为input的field名称是一致的。
例如:
DataSet中x,seq_lens是input,那么forward就应该是
def forward(self, x, seq_lens):
pass
我们是通过形参名称进行匹配的field的
(3.2) 模型的forward的output需要是dict类型的。
建议将输出设置为{"pred": xx}.

+ 0
- 113
docs/source/tutorials/fastnlp_1_minute_tutorial.rst View File

@@ -1,113 +0,0 @@

FastNLP 1分钟上手教程
=====================

教程原文见 https://github.com/fastnlp/fastNLP/blob/master/tutorials/fastnlp_1min_tutorial.ipynb

step 1
------

读取数据集

.. code:: ipython3

from fastNLP import DataSet
# linux_path = "../test/data_for_tests/tutorial_sample_dataset.csv"
win_path = "C:\\Users\zyfeng\Desktop\FudanNLP\\fastNLP\\test\\data_for_tests\\tutorial_sample_dataset.csv"
ds = DataSet.read_csv(win_path, headers=('raw_sentence', 'label'), sep='\t')

step 2
------

数据预处理 1. 类型转换 2. 切分验证集 3. 构建词典

.. code:: ipython3

# 将所有数字转为小写
ds.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence')
# label转int
ds.apply(lambda x: int(x['label']), new_field_name='label_seq', is_target=True)
def split_sent(ins):
return ins['raw_sentence'].split()
ds.apply(split_sent, new_field_name='words', is_input=True)

.. code:: ipython3

# 分割训练集/验证集
train_data, dev_data = ds.split(0.3)
print("Train size: ", len(train_data))
print("Test size: ", len(dev_data))


.. parsed-literal::

Train size: 54
Test size: 23

.. code:: ipython3

from fastNLP import Vocabulary
vocab = Vocabulary(min_freq=2)
train_data.apply(lambda x: [vocab.add(word) for word in x['words']])
# index句子, Vocabulary.to_index(word)
train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', is_input=True)
dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', is_input=True)

step 3
------

定义模型

.. code:: ipython3

from fastNLP.models import CNNText
model = CNNText(embed_num=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1)

step 4
------

开始训练

.. code:: ipython3

from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric
trainer = Trainer(model=model,
train_data=train_data,
dev_data=dev_data,
loss=CrossEntropyLoss(),
metrics=AccuracyMetric()
)
trainer.train()
print('Train finished!')


.. parsed-literal::

training epochs started 2018-12-07 14:03:41



.. parsed-literal::

HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=6), HTML(value='')), layout=Layout(display='i…



.. parsed-literal::

Epoch 1/3. Step:2/6. AccuracyMetric: acc=0.26087
Epoch 2/3. Step:4/6. AccuracyMetric: acc=0.347826
Epoch 3/3. Step:6/6. AccuracyMetric: acc=0.608696
Train finished!

本教程结束。更多操作请参考进阶教程。
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 0
- 5
docs/source/tutorials/fastnlp_advanced_tutorial.rst View File

@@ -1,5 +0,0 @@
fastNLP 进阶教程
===============

教程原文见 https://github.com/fastnlp/fastNLP/blob/master/tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb


+ 0
- 5
docs/source/tutorials/fastnlp_developer_guide.rst View File

@@ -1,5 +0,0 @@
fastNLP 开发者指南
===============

原文见 https://github.com/fastnlp/fastNLP/blob/master/tutorials/tutorial_for_developer.md


+ 1
- 1
docs/source/user/quickstart.rst View File

@@ -121,4 +121,4 @@
In Epoch:6/Step:12, got best dev performance:AccuracyMetric: acc=0.8 In Epoch:6/Step:12, got best dev performance:AccuracyMetric: acc=0.8
Reloaded the best model. Reloaded the best model.


这份教程只是简单地介绍了使用 fastNLP 工作的流程,具体的细节分析见 :doc:`/user/tutorials`
这份教程只是简单地介绍了使用 fastNLP 工作的流程,具体的细节分析见 :doc:`/user/tutorial_one`

+ 0
- 3
docs/source/user/task1.rst View File

@@ -1,3 +0,0 @@
=====================
用 fastNLP 分类
=====================

+ 0
- 3
docs/source/user/task2.rst View File

@@ -1,3 +0,0 @@
=====================
用 fastNLP 分词
=====================

+ 371
- 0
docs/source/user/tutorial_one.rst View File

@@ -0,0 +1,371 @@
===============
详细指南
===============

我们使用和 :doc:`/user/quickstart` 中一样的任务来进行详细的介绍。给出一段文字,预测它的标签是0~4中的哪一个
(数据来源 `kaggle <https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews>`_ )。

--------------
数据处理
--------------

数据读入
我们可以使用 fastNLP :mod:`fastNLP.io` 模块中的 :class:`~fastNLP.io.CSVLoader` 类,轻松地从 csv 文件读取我们的数据。
这里的 dataset 是 fastNLP 中 :class:`~fastNLP.DataSet` 类的对象

.. code-block:: python

from fastNLP.io import CSVLoader

loader = CSVLoader(headers=('raw_sentence', 'label'), sep='\t')
dataset = loader.load("./sample_data/tutorial_sample_dataset.csv")

除了读取数据外,fastNLP 还提供了读取其它文件类型的 Loader 类、读取 Embedding的 Loader 等。详见 :doc:`/fastNLP.io` 。

Instance 和 DataSet
fastNLP 中的 :class:`~fastNLP.DataSet` 类对象类似于二维表格,它的每一列是一个 :mod:`~fastNLP.core.field`
每一行是一个 :mod:`~fastNLP.core.instance` 。我们可以手动向数据集中添加 :class:`~fastNLP.Instance` 类的对象

.. code-block:: python

from fastNLP import Instance

dataset.append(Instance(raw_sentence='fake data', label='0'))

此时的 ``dataset[-1]`` 的值如下,可以看到,数据集中的每个数据包含 ``raw_sentence`` 和 ``label`` 两个
:mod:`~fastNLP.core.field` ,他们的类型都是 ``str`` ::

{'raw_sentence': fake data type=str, 'label': 0 type=str}

field 的修改
我们使用 :class:`~fastNLP.DataSet` 类的 :meth:`~fastNLP.DataSet.apply` 方法将 ``raw_sentence`` 中字母变成小写,并将句子分词。
同时也将 ``label`` :mod:`~fastNLP.core.field` 转化为整数并改名为 ``target``

.. code-block:: python

dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='sentence')
dataset.apply_field(lambda x: x.split(), field_name='sentence', new_field_name='words')
dataset.apply(lambda x: int(x['label']), new_field_name='target')

``words`` 和 ``target`` 已经足够用于 :class:`~fastNLP.models.CNNText` 的训练了,但我们从其文档
:class:`~fastNLP.models.CNNText` 中看到,在 :meth:`~fastNLP.models.CNNText.forward` 的时候,还可以传入可选参数 ``seq_len`` 。
所以,我们再使用 :meth:`~fastNLP.DataSet.apply_field` 方法增加一个名为 ``seq_len`` 的 :mod:`~fastNLP.core.field` 。

.. code-block:: python

dataset.apply_field(lambda x: len(x), field_name='words', new_field_name='seq_len')

观察可知: :meth:`~fastNLP.DataSet.apply_field` 与 :meth:`~fastNLP.DataSet.apply` 类似,
但所传入的 `lambda` 函数是针对一个 :class:`~fastNLP.Instance` 中的一个 :mod:`~fastNLP.core.field` 的;
而 :meth:`~fastNLP.DataSet.apply` 所传入的 `lambda` 函数是针对整个 :class:`~fastNLP.Instance` 的。

.. note::
`lambda` 函数即匿名函数,是 Python 的重要特性。 ``lambda x: len(x)`` 和下面的这个函数的作用相同::

def func_lambda(x):
return len(x)

你也可以编写复杂的函数做为 :meth:`~fastNLP.DataSet.apply_field` 与 :meth:`~fastNLP.DataSet.apply` 的参数

Vocabulary 的使用
我们再用 :class:`~fastNLP.Vocabulary` 类来统计数据中出现的单词,并使用 :meth:`~fastNLP.Vocabularyindex_dataset`
将单词序列转化为训练可用的数字序列。

.. code-block:: python

from fastNLP import Vocabulary

vocab = Vocabulary(min_freq=2).from_dataset(dataset, field_name='words')
vocab.index_dataset(dataset, field_name='words',new_field_name='words')

数据集分割
除了修改 :mod:`~fastNLP.core.field` 之外,我们还可以对 :class:`~fastNLP.DataSet` 进行分割,以供训练、开发和测试使用。
下面这段代码展示了 :meth:`~fastNLP.DataSet.split` 的使用方法(但实际应该放在后面两段改名和设置输入的代码之后)

.. code-block:: python

train_dev_data, test_data = dataset.split(0.1)
train_data, dev_data = train_dev_data.split(0.1)
len(train_data), len(dev_data), len(test_data)

---------------------
使用内置模型训练
---------------------

内置模型的输入输出命名
fastNLP内置了一些完整的神经网络模型,详见 :doc:`/fastNLP.models` , 我们使用其中的 :class:`~fastNLP.models.CNNText` 模型进行训练。
为了使用内置的 :class:`~fastNLP.models.CNNText`,我们必须修改 :class:`~fastNLP.DataSet` 中 :mod:`~fastNLP.core.field` 的名称。
在这个例子中模型输入 (forward方法的参数) 为 ``words`` 和 ``seq_len`` ; 预测输出为 ``pred`` ;标准答案为 ``target`` 。
具体的命名规范可以参考 :doc:`/fastNLP.core.const` 。

如果不想查看文档,您也可以使用 :class:`~fastNLP.Const` 类进行命名。下面的代码展示了给 :class:`~fastNLP.DataSet` 中
:mod:`~fastNLP.core.field` 改名的 :meth:`~fastNLP.DataSet.rename_field` 方法,以及 :class:`~fastNLP.Const` 类的使用方法。

.. code-block:: python

from fastNLP import Const

dataset.rename_field('words', Const.INPUT)
dataset.rename_field('seq_len', Const.INPUT_LEN)
dataset.rename_field('target', Const.TARGET)

在给 :class:`~fastNLP.DataSet` 中 :mod:`~fastNLP.core.field` 改名后,我们还需要设置训练所需的输入和目标,这里使用的是
:meth:`~fastNLP.DataSet.set_input` 和 :meth:`~fastNLP.DataSet.set_target` 两个函数。

.. code-block:: python

dataset.set_input(Const.INPUT, Const.INPUT_LEN)
dataset.set_target(Const.TARGET)

快速训练
现在我们可以导入 fastNLP 内置的文本分类模型 :class:`~fastNLP.models.CNNText` ,并使用 :class:`~fastNLP.Trainer` 进行训练了
(其中 ``loss`` 和 ``metrics`` 的定义,我们将在后续两段代码中给出)。

.. code-block:: python

from fastNLP.models import CNNText
from fastNLP import Trainer

model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)

trainer = Trainer(model=model_cnn, train_data=train_data, dev_data=dev_data,
loss=loss, metrics=metrics)
trainer.train()

训练过程的输出如下::

input fields after batch(if batch size is 2):
words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 26])
target fields after batch(if batch size is 2):
target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])

training epochs started 2019-05-09-10-59-39
Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.333333

Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.533333

Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.533333

Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.533333

Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.6

Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.8

Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.8

Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.733333

Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.733333

Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.733333


In Epoch:6/Step:12, got best dev performance:AccuracyMetric: acc=0.8
Reloaded the best model.

损失函数
训练模型需要提供一个损失函数, 下面提供了一个在分类问题中常用的交叉熵损失。注意它的 **初始化参数** 。
``pred`` 参数对应的是模型的 forward 方法返回的 dict 中的一个 key 的名字。
``target`` 参数对应的是 :class:`~fastNLP.DataSet` 中作为标签的 :mod:`~fastNLP.core.field` 的名字。
这里我们用 :class:`~fastNLP.Const` 来辅助命名,如果你自己编写模型中 forward 方法的返回值或
数据集中 :mod:`~fastNLP.core.field` 的名字与本例不同, 你可以把 ``pred`` 参数和 ``target`` 参数设定符合自己代码的值。

.. code-block:: python

from fastNLP import CrossEntropyLoss

# loss = CrossEntropyLoss() 在本例中与下面这行代码等价
loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET)

评价指标
训练模型需要提供一个评价指标。这里使用准确率做为评价指标。参数的 `命名规则` 跟上面类似。
``pred`` 参数对应的是模型的 forward 方法返回的 dict 中的一个 key 的名字。
``target`` 参数对应的是 :class:`~fastNLP.DataSet` 中作为标签的 :mod:`~fastNLP.core.field` 的名字。

.. code-block:: python

from fastNLP import AccuracyMetric

# metrics=AccuracyMetric() 在本例中与下面这行代码等价
metrics=AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)

快速测试
与 :class:`~fastNLP.Trainer` 对应,fastNLP 也提供了 :class:`~fastNLP.Tester` 用于快速测试,用法如下

.. code-block:: python

from fastNLP import Tester

tester = Tester(test_data, model_cnn, metrics=AccuracyMetric())
tester.test()

---------------------
编写自己的模型
---------------------

因为 fastNLP 是基于 `PyTorch <https://pytorch.org/>`_ 开发的框架,所以我们可以基于 PyTorch 模型编写自己的神经网络模型。
与标准的 PyTorch 模型不同,fastNLP 模型中 forward 方法返回的是一个字典,字典中至少需要包含 "pred" 这个字段。
而 forward 方法的参数名称必须与 :class:`~fastNLP.DataSet` 中用 :meth:`~fastNLP.DataSet.set_input` 设定的名称一致。
模型定义的代码如下:

.. code-block:: python

import torch
import torch.nn as nn

class LSTMText(nn.Module):
def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):
super().__init__()

self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout)
self.fc = nn.Linear(hidden_dim * 2, output_dim)
self.dropout = nn.Dropout(dropout)

def forward(self, words):
# (input) words : (batch_size, seq_len)
words = words.permute(1,0)
# words : (seq_len, batch_size)

embedded = self.dropout(self.embedding(words))
# embedded : (seq_len, batch_size, embedding_dim)
output, (hidden, cell) = self.lstm(embedded)
# output: (seq_len, batch_size, hidden_dim * 2)
# hidden: (num_layers * 2, batch_size, hidden_dim)
# cell: (num_layers * 2, batch_size, hidden_dim)

hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
hidden = self.dropout(hidden)
# hidden: (batch_size, hidden_dim * 2)

pred = self.fc(hidden.squeeze(0))
# result: (batch_size, output_dim)
return {"pred":pred}

模型的使用方法与内置模型 :class:`~fastNLP.models.CNNText` 一致

.. code-block:: python

model_lstm = LSTMText(len(vocab),50,5)

trainer = Trainer(model=model_lstm, train_data=train_data, dev_data=dev_data,
loss=loss, metrics=metrics)
trainer.train()

tester = Tester(test_data, model_lstm, metrics=AccuracyMetric())
tester.test()

.. todo::
使用 :doc:`/fastNLP.modules` 编写模型

--------------------------
自己编写训练过程
--------------------------

如果你想用类似 PyTorch 的使用方法,自己编写训练过程,你可以参考下面这段代码。其中使用了 fastNLP 提供的 :class:`~fastNLP.Batch`
来获得小批量训练的小批量数据,使用 :class:`~fastNLP.BucketSampler` 做为 :class:`~fastNLP.Batch` 的参数来选择采样的方式。
这段代码中使用了 PyTorch 的 `torch.optim.Adam` 优化器 和 `torch.nn.CrossEntropyLoss` 损失函数,并自己计算了正确率

.. code-block:: python

from fastNLP import BucketSampler
from fastNLP import Batch
import torch
import time

model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)

def train(epoch, data):
optim = torch.optim.Adam(model.parameters(), lr=0.001)
lossfunc = torch.nn.CrossEntropyLoss()
batch_size = 32

train_sampler = BucketSampler(batch_size=batch_size, seq_len_field_name='seq_len')
train_batch = Batch(batch_size=batch_size, dataset=data, sampler=train_sampler)

start_time = time.time()
for i in range(epoch):
loss_list = []
for batch_x, batch_y in train_batch:
optim.zero_grad()
output = model(batch_x['words'])
loss = lossfunc(output['pred'], batch_y['target'])
loss.backward()
optim.step()
loss_list.append(loss.item())
print('Epoch {:d} Avg Loss: {:.2f}'.format(i, sum(loss_list) / len(loss_list)),end=" ")
print('{:d}ms'.format(round((time.time()-start_time)*1000)))
loss_list.clear()

train(10, train_data)

tester = Tester(test_data, model, metrics=AccuracyMetric())
tester.test()

这段代码的输出如下::

Epoch 0 Avg Loss: 2.76 17ms
Epoch 1 Avg Loss: 2.55 29ms
Epoch 2 Avg Loss: 2.37 41ms
Epoch 3 Avg Loss: 2.30 53ms
Epoch 4 Avg Loss: 2.12 65ms
Epoch 5 Avg Loss: 2.16 76ms
Epoch 6 Avg Loss: 1.88 88ms
Epoch 7 Avg Loss: 1.84 99ms
Epoch 8 Avg Loss: 1.71 111ms
Epoch 9 Avg Loss: 1.62 122ms
[tester]
AccuracyMetric: acc=0.142857

----------------------------------
使用 Callback 增强 Trainer
----------------------------------

如果你不想自己实现繁琐的训练过程,只希望在训练过程中实现一些自己的功能(比如:输出从训练开始到当前 batch 结束的总时间),
你可以使用 fastNLP 提供的 :class:`~fastNLP.Callback` 类。下面的例子中,我们继承 :class:`~fastNLP.Callback` 类实现了这个功能。

.. code-block:: python

from fastNLP import Callback

start_time = time.time()

class MyCallback(Callback):
def on_epoch_end(self):
print('Sum Time: {:d}ms\n\n'.format(round((time.time()-start_time)*1000)))


model = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)
trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data,
loss=CrossEntropyLoss(), metrics=AccuracyMetric(), callbacks=[MyCallback()])
trainer.train()

训练输出如下::

input fields after batch(if batch size is 2):
words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 16])
seq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])
target fields after batch(if batch size is 2):
target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])

training epochs started 2019-05-12-21-38-40
Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.285714

Sum Time: 51ms


…………………………


Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.857143

Sum Time: 212ms



In Epoch:10/Step:20, got best dev performance:AccuracyMetric: acc=0.857143
Reloaded the best model.

这个例子只是介绍了 :class:`~fastNLP.Callback` 类的使用方法。实际应用(比如:负采样、Learning Rate Decay、Early Stop 等)中
很多功能已经被 fastNLP 实现了。你可以直接 import 它们使用,详细请查看文档 :doc:`/fastNLP.core.callback` 。

+ 5
- 0
docs/source/user/with_fitlog.rst View File

@@ -0,0 +1,5 @@
=================
科研向导
=================

本文介绍使用 fastNLP 和 fitlog 进行科学研究的方法

+ 5
- 7
fastNLP/__init__.py View File

@@ -1,12 +1,11 @@
""" """
fastNLP 由 :mod:`~fastNLP.core` 、 :mod:`~fastNLP.io` 、:mod:`~fastNLP.modules`、:mod:`~fastNLP.models` fastNLP 由 :mod:`~fastNLP.core` 、 :mod:`~fastNLP.io` 、:mod:`~fastNLP.modules`、:mod:`~fastNLP.models`
和 :mod:`~fastNLP.component` 等子模块组成。
等子模块组成,你可以点进去查看每个模块的文档


- :mod:`~fastNLP.core` fastNLP 的核心模块,包括 DataSet、 Trainer、 Tester 等组件
- :mod:`~fastNLP.io` fastNLP 的输入输出模块,实现了数据集的读取,模型的存取等功能
- :mod:`~fastNLP.modules` TODO 如何介绍
- :mod:`~fastNLP.models` 使用 fastNLP 实现的一些常见模型,具体参见 :doc:`fastNLP.models`
- :mod:`~fastNLP.component` TODO
- :mod:`~fastNLP.core` 是fastNLP 的核心模块,包括 DataSet、 Trainer、 Tester 等组件。详见文档 :doc:`/fastNLP.core`
- :mod:`~fastNLP.io` 是实现输入输出的模块,包括了数据集的读取,模型的存取等功能。详见文档 :doc:`/fastNLP.io`
- :mod:`~fastNLP.modules` 包含了用于搭建神经网络模型的诸多组件,可以帮助用户快速搭建自己所需的网络。详见文档 :doc:`/fastNLP.modules`
- :mod:`~fastNLP.models` 包含了一些使用 fastNLP 实现的完整网络模型,包括CNNText、SeqLabeling等常见模型。详见文档 :doc:`/fastNLP.models`


fastNLP 中最常用的组件可以直接从 fastNLP 包中 import ,他们的文档如下: fastNLP 中最常用的组件可以直接从 fastNLP 包中 import ,他们的文档如下:
""" """
@@ -33,7 +32,6 @@ __all__ = [
"EngChar2DPadder", "EngChar2DPadder",
"AccuracyMetric", "AccuracyMetric",
"BMESF1PreRecMetric",
"SpanFPreRecMetric", "SpanFPreRecMetric",
"SQuADMetric", "SQuADMetric",


+ 5
- 4
fastNLP/core/__init__.py View File

@@ -1,5 +1,5 @@
""" """
core 模块里实现了 fastNLP 的核心框架,常用的组件都可以从 fastNLP 包中直接 import。当然你也同样可以从 core 模块的子模块中 import,
core 模块里实现了 fastNLP 的核心框架,常用的功能都可以从 fastNLP 包中直接 import。当然你也同样可以从 core 模块的子模块中 import,
例如 Batch 组件有两种 import 的方式:: 例如 Batch 组件有两种 import 的方式::
# 直接从 fastNLP 中 import # 直接从 fastNLP 中 import
@@ -8,10 +8,11 @@ core 模块里实现了 fastNLP 的核心框架,常用的组件都可以从 fa
# 从 core 模块的子模块 batch 中 import # 从 core 模块的子模块 batch 中 import
from fastNLP.core.batch import Batch from fastNLP.core.batch import Batch


对于常用的功能,你只需要在 :doc:`fastNLP` 中查看即可。如果想了解各个子模块的分工,您可以阅读以下文档:

TODO 向导
对于常用的功能,你只需要在 :doc:`fastNLP` 中查看即可。如果想了解各个子模块的具体作用,您可以在下面找到每个子模块的具体文档。


.. todo::
介绍core 的子模块的分工,好像必要性不大
""" """
from .batch import Batch from .batch import Batch
from .callback import Callback, GradientClipCallback, EarlyStopCallback, TensorboardCallback, LRScheduler, ControlC from .callback import Callback, GradientClipCallback, EarlyStopCallback, TensorboardCallback, LRScheduler, ControlC


+ 19
- 11
fastNLP/core/callback.py View File

@@ -1,5 +1,10 @@
r""" r"""
callback模块实现了 fastNLP 中的许多 callback 类,用于增强 :class:`~fastNLP.Trainer` 类,
callback模块实现了 fastNLP 中的许多 callback 类,用于增强 :class:`~fastNLP.Trainer` 类。

虽然Trainer本身已经集成了一些功能,但仍然不足以囊括训练过程中可能需要到的功能,
比如负采样,learning rate decay, Early Stop等。
为了解决这个问题fastNLP引入了callback的机制,Callback 是一种在Trainer训练过程中特定阶段会运行的函数集合。
关于Trainer的详细文档,请参见 :doc:`trainer 模块<fastNLP.core.trainer>`


我们将 :meth:`~fastNLP.Train.train` 这个函数内部分为以下的阶段,在对应阶段会触发相应的调用:: 我们将 :meth:`~fastNLP.Train.train` 这个函数内部分为以下的阶段,在对应阶段会触发相应的调用::


@@ -26,8 +31,6 @@ callback模块实现了 fastNLP 中的许多 callback 类,用于增强 :class:
callback.on_train_end() # 训练结束 callback.on_train_end() # 训练结束
callback.on_exception() # 这是一个特殊的步骤,在训练过程中遭遇exception会跳转到这里 callback.on_exception() # 这是一个特殊的步骤,在训练过程中遭遇exception会跳转到这里


关于Trainer的详细文档,请参见 :doc:`trainer 模块<fastNLP.core.trainer>`

如下面的例子所示,我们可以使用内置的 callback 类,或者继承 :class:`~fastNLP.core.callback.Callback` 如下面的例子所示,我们可以使用内置的 callback 类,或者继承 :class:`~fastNLP.core.callback.Callback`
定义自己的 callback 类:: 定义自己的 callback 类::
@@ -63,8 +66,9 @@ from ..io.model_io import ModelSaver, ModelLoader


try: try:
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
tensorboardX_flag = True
except: except:
pass
tensorboardX_flag = False




class Callback(object): class Callback(object):
@@ -578,8 +582,11 @@ class TensorboardCallback(Callback):
path = os.path.join("./", 'tensorboard_logs_{}'.format(self.trainer.start_time)) path = os.path.join("./", 'tensorboard_logs_{}'.format(self.trainer.start_time))
else: else:
path = os.path.join(save_dir, 'tensorboard_logs_{}'.format(self.trainer.start_time)) path = os.path.join(save_dir, 'tensorboard_logs_{}'.format(self.trainer.start_time))
self._summary_writer = SummaryWriter(path)
if tensorboardX_flag:
self._summary_writer = SummaryWriter(path)
else:
self._summary_writer = None
def on_batch_begin(self, batch_x, batch_y, indices): def on_batch_begin(self, batch_x, batch_y, indices):
if "model" in self.options and self.graph_added is False: if "model" in self.options and self.graph_added is False:
# tesorboardX 这里有大bug,暂时没法画模型图 # tesorboardX 这里有大bug,暂时没法画模型图
@@ -591,10 +598,10 @@ class TensorboardCallback(Callback):
self.graph_added = True self.graph_added = True
def on_backward_begin(self, loss): def on_backward_begin(self, loss):
if "loss" in self.options:
if "loss" in self.options and self._summary_writer:
self._summary_writer.add_scalar("loss", loss.item(), global_step=self.trainer.step) self._summary_writer.add_scalar("loss", loss.item(), global_step=self.trainer.step)
if "model" in self.options:
if "model" in self.options and self._summary_writer:
for name, param in self.trainer.model.named_parameters(): for name, param in self.trainer.model.named_parameters():
if param.requires_grad: if param.requires_grad:
self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=self.trainer.step) self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=self.trainer.step)
@@ -603,15 +610,16 @@ class TensorboardCallback(Callback):
global_step=self.trainer.step) global_step=self.trainer.step)
def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval): def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
if "metric" in self.options:
if "metric" in self.options and self._summary_writer:
for name, metric in eval_result.items(): for name, metric in eval_result.items():
for metric_key, metric_val in metric.items(): for metric_key, metric_val in metric.items():
self._summary_writer.add_scalar("valid_{}_{}".format(name, metric_key), metric_val, self._summary_writer.add_scalar("valid_{}_{}".format(name, metric_key), metric_val,
global_step=self.trainer.step) global_step=self.trainer.step)
def on_train_end(self): def on_train_end(self):
self._summary_writer.close()
del self._summary_writer
if self._summary_writer:
self._summary_writer.close()
del self._summary_writer
def on_exception(self, exception): def on_exception(self, exception):
if hasattr(self, "_summary_writer"): if hasattr(self, "_summary_writer"):


+ 6
- 1
fastNLP/core/const.py View File

@@ -1,5 +1,10 @@
class Const: class Const:
"""fastNLP中field命名常量。
"""
fastNLP中field命名常量。
.. todo::
把下面这段改成表格
具体列表:: 具体列表::


INPUT 模型的序列输入 words(复数words1, words2) INPUT 模型的序列输入 words(复数words1, words2)


+ 24
- 27
fastNLP/core/dataset.py View File

@@ -58,7 +58,10 @@
2 DataSet与预处理 2 DataSet与预处理
常见的预处理有如下几种 常见的预处理有如下几种


2.1 从某个文本文件读取内容 # TODO 引用DataLoader
2.1 从某个文本文件读取内容 #

.. todo::
引用DataLoader


Example:: Example::


@@ -209,39 +212,33 @@
target和input,这种情况下,fastNLP默认不进行pad。另外,当某个field已经被设置为了target或者input后,之后append的 target和input,这种情况下,fastNLP默认不进行pad。另外,当某个field已经被设置为了target或者input后,之后append的
instance对应的field必须要和前面已有的内容一致,否则会报错。 instance对应的field必须要和前面已有的内容一致,否则会报错。


可以查看field的dtype

Example::
可以查看field的dtype::
from fastNLP import DataSet
from fastNLP import DataSet


d = DataSet({'a': [0, 1, 3], 'b':[[1.0, 2.0], [0.1, 0.2], [3]]})
d.set_input('a', 'b')
d.a.dtype
>> numpy.int64
d.b.dtype
>> numpy.float64
# 默认情况下'a'这个field将被转换为torch.LongTensor,但如果需要其为torch.FloatTensor可以手动修改dtype
d.a.dtype = float # 请确保该field的确可以全部转换为float。
d = DataSet({'a': [0, 1, 3], 'b':[[1.0, 2.0], [0.1, 0.2], [3]]})
d.set_input('a', 'b')
d.a.dtype
>> numpy.int64
d.b.dtype
>> numpy.float64
# 默认情况下'a'这个field将被转换为torch.LongTensor,但如果需要其为torch.FloatTensor可以手动修改dtype
d.a.dtype = float # 请确保该field的确可以全部转换为float。


如果某个field中出现了多种类型混合(比如一部分为str,一部分为int)的情况,fastNLP无法判断该field的类型,会报如下的 如果某个field中出现了多种类型混合(比如一部分为str,一部分为int)的情况,fastNLP无法判断该field的类型,会报如下的
错误:

Example::
错误::


from fastNLP import DataSet
d = DataSet({'data': [1, 'a']})
d.set_input('data')
>> RuntimeError: Mixed data types in Field data: [<class 'str'>, <class 'int'>]
from fastNLP import DataSet
d = DataSet({'data': [1, 'a']})
d.set_input('data')
>> RuntimeError: Mixed data types in Field data: [<class 'str'>, <class 'int'>]


可以通过设置以忽略对该field进行类型检查
可以通过设置以忽略对该field进行类型检查::


Example::

from fastNLP import DataSet
d = DataSet({'data': [1, 'a']})
d.set_ignore_type('data')
d.set_input('data')
from fastNLP import DataSet
d = DataSet({'data': [1, 'a']})
d.set_ignore_type('data')
d.set_input('data')


当某个field被设置为忽略type之后,fastNLP将不对其进行pad。 当某个field被设置为忽略type之后,fastNLP将不对其进行pad。




+ 1
- 2
fastNLP/core/losses.py View File

@@ -221,8 +221,7 @@ class CrossEntropyLoss(LossBase):
""" """
def __init__(self, pred=None, target=None, padding_idx=-100): def __init__(self, pred=None, target=None, padding_idx=-100):
# TODO 需要做一些检查,F.cross_entropy在计算时,如果pred是(16, 10 ,4), target的形状按道理应该是(16, 10), 但实际却需要
# TODO (16, 4)
# TODO 需要做一些检查,F.cross_entropy在计算时,如果pred是(16, 10 ,4), target的形状按道理应该是(16, 10), 但实际需要(16,4)
super(CrossEntropyLoss, self).__init__() super(CrossEntropyLoss, self).__init__()
self._init_param_map(pred=pred, target=target) self._init_param_map(pred=pred, target=target)
self.padding_idx = padding_idx self.padding_idx = padding_idx


+ 2
- 3
fastNLP/core/utils.py View File

@@ -35,9 +35,7 @@ def cache_results(_cache_fp, _refresh=False, _verbose=1):
""" """
别名::class:`fastNLP.cache_results` :class:`fastNLP.core.uitls.cache_results` 别名::class:`fastNLP.cache_results` :class:`fastNLP.core.uitls.cache_results`


cache_results是fastNLP中用于cache数据的装饰器。通过下面的例子看一下如何使用

Example::
cache_results是fastNLP中用于cache数据的装饰器。通过下面的例子看一下如何使用::


import time import time
import numpy as np import numpy as np
@@ -607,6 +605,7 @@ def seq_len_to_mask(seq_len):
转变 1-d seq_len到2-d mask. 转变 1-d seq_len到2-d mask.


Example:: Example::
>>> seq_len = torch.arange(2, 16) >>> seq_len = torch.arange(2, 16)
>>> mask = seq_len_to_mask(seq_len) >>> mask = seq_len_to_mask(seq_len)
>>> print(mask.size()) >>> print(mask.size())


+ 6
- 7
fastNLP/io/__init__.py View File

@@ -5,15 +5,10 @@


2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类


3. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>`
3. 用于保存和载入模型的类, 参考 :doc:`/fastNLP.io.model_io`


这些类的使用方法可以在对应module的文档下查看.
这些类的使用方法如下:
""" """
from .embed_loader import EmbedLoader
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \
PeopleDailyCorpusLoader, Conll2003Loader
from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver

__all__ = [ __all__ = [
'EmbedLoader', 'EmbedLoader',
@@ -29,3 +24,7 @@ __all__ = [
'ModelLoader', 'ModelLoader',
'ModelSaver', 'ModelSaver',
] ]
from .embed_loader import EmbedLoader
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \
PeopleDailyCorpusLoader, Conll2003Loader
from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver

+ 2
- 1
fastNLP/io/base_loader.py View File

@@ -47,7 +47,6 @@ class BaseLoader(object):




class DataLoaderRegister: class DataLoaderRegister:
# TODO 这个类使用在何处?
_readers = {} _readers = {}


@classmethod @classmethod
@@ -64,3 +63,5 @@ class DataLoaderRegister:
if read_fn_name in cls._readers: if read_fn_name in cls._readers:
return cls._readers[read_fn_name] return cls._readers[read_fn_name]
raise AttributeError('no read function: {}'.format(read_fn_name)) raise AttributeError('no read function: {}'.format(read_fn_name))
# TODO 这个类使用在何处?

+ 2
- 3
fastNLP/io/dataset_loader.py View File

@@ -1,8 +1,7 @@
""" """
dataset_loader模块实现了许多 DataSetLoader, 用于读取不同格式的数据, 并返回 `DataSet` , dataset_loader模块实现了许多 DataSetLoader, 用于读取不同格式的数据, 并返回 `DataSet` ,
得到的 :class:`~fastNLP.DataSet` 对象可以直接传入 :class:`~fastNLP.Trainer`, :class:`~fastNLP.Tester`, 用于模型的训练和测试

Example::
得到的 :class:`~fastNLP.DataSet` 对象可以直接传入 :class:`~fastNLP.Trainer`, :class:`~fastNLP.Tester`, 用于模型的训练和测试。
以SNLI数据集为例::


loader = SNLILoader() loader = SNLILoader()
train_ds = loader.load('path/to/train') train_ds = loader.load('path/to/train')


+ 1
- 6
fastNLP/io/embed_loader.py View File

@@ -1,8 +1,3 @@
"""
.. _embed-loader:

用于读取预训练的embedding, 读取结果可直接载入为模型参数
"""
import os import os


import numpy as np import numpy as np
@@ -16,7 +11,7 @@ class EmbedLoader(BaseLoader):
""" """
别名::class:`fastNLP.io.EmbedLoader` :class:`fastNLP.io.embed_loader.EmbedLoader` 别名::class:`fastNLP.io.EmbedLoader` :class:`fastNLP.io.embed_loader.EmbedLoader`


这个类用于从预训练的Embedding中load数据
用于读取预训练的embedding, 读取结果可直接载入为模型参数
""" """


def __init__(self): def __init__(self):


+ 7
- 2
fastNLP/models/__init__.py View File

@@ -1,8 +1,13 @@
""" """
使用 fastNLP 实现的一系列常见模型,具体有:
TODO 详细介绍的表格,与主页相对应
fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models.CNNText` 、
:class:`~fastNLP.models.SeqLabeling` 等完整的模型,以供用户直接使用。

.. todo::
这些模型的介绍(与主页一致)



""" """
__all__ = ["CNNText", "SeqLabeling", "ESIM", "STSeqLabel", "AdvSeqLabel", "STNLICls", "STSeqCls"]
from .base_model import BaseModel from .base_model import BaseModel
from .bert import BertForMultipleChoice, BertForQuestionAnswering, BertForSequenceClassification, \ from .bert import BertForMultipleChoice, BertForQuestionAnswering, BertForSequenceClassification, \
BertForTokenClassification BertForTokenClassification


+ 2
- 2
fastNLP/models/enas_trainer.py View File

@@ -78,7 +78,7 @@ class ENASTrainer(Trainer):
results['seconds'] = 0. results['seconds'] = 0.
return results return results
try: try:
if torch.cuda.is_available() and self.use_cuda:
if torch.cuda.is_available() and "cuda" in self.device:
self.model = self.model.cuda() self.model = self.model.cuda()
self._model_device = self.model.parameters().__next__().device self._model_device = self.model.parameters().__next__().device
self._mode(self.model, is_test=False) self._mode(self.model, is_test=False)
@@ -337,7 +337,7 @@ class ENASTrainer(Trainer):


# policy loss # policy loss
loss = -log_probs*utils.get_variable(adv, loss = -log_probs*utils.get_variable(adv,
self.use_cuda,
'cuda' in self.device,
requires_grad=False) requires_grad=False)


loss = loss.sum() # or loss.mean() loss = loss.sum() # or loss.mean()


+ 36
- 4
fastNLP/modules/__init__.py View File

@@ -1,10 +1,25 @@
""" """
modules 模块是 fastNLP 的重要组成部分,它实现了神经网络构建中常见的组件,
具体包括 TODO
大部分用于的 NLP 任务神经网络都可以看做由编码 :mod:`~fastNLP.modules.encoder` 、
聚合 :mod:`~fastNLP.modules.aggregator` 、解码 :mod:`~fastNLP.modules.decoder` 三种模块组成。


可以和 PyTorch 结合使用?TODO
.. image:: figures/text_classification.png


TODO __all__ 里面多暴露一些
:mod:`~fastNLP.modules` 中实现了 fastNLP 提供的诸多模块组件,可以帮助用户快速搭建自己所需的网络。
三种模块的功能和常见组件如下:

+-----------------------+-----------------------+-----------------------+
| module type | functionality | example |
+=======================+=======================+=======================+
| encoder | 将输入编码为具有具 | embedding, RNN, CNN, |
| | 有表示能力的向量 | transformer |
+-----------------------+-----------------------+-----------------------+
| aggregator | 从多个向量中聚合信息 | self-attention, |
| | | max-pooling |
+-----------------------+-----------------------+-----------------------+
| decoder | 将具有某种表示意义的 | MLP, CRF |
| | 向量解码为需要的输出 | |
| | 形式 | |
+-----------------------+-----------------------+-----------------------+


""" """
from . import aggregator from . import aggregator
@@ -16,3 +31,20 @@ from .dropout import TimestepDropout
from .encoder import * from .encoder import *
from .utils import get_embeddings from .utils import get_embeddings


__all__ = [
"LSTM",
"Embedding",
"ConvMaxpool",
"BertModel",
"MaxPool",
"MaxPoolWithMask",
"AvgPool",
"MultiHeadAttention",
"BiAttention",

"MLP",
"ConditionalRandomField",
"viterbi_decode",
"allowed_transitions",
]

+ 8
- 1
fastNLP/modules/aggregator/__init__.py View File

@@ -1,7 +1,14 @@
__all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MultiHeadAttention", "BiAttention"]
from .pooling import MaxPool from .pooling import MaxPool
from .pooling import MaxPoolWithMask from .pooling import MaxPoolWithMask
from .pooling import AvgPool from .pooling import AvgPool
from .pooling import MeanPoolWithMask from .pooling import MeanPoolWithMask


from .attention import MultiHeadAttention, BiAttention from .attention import MultiHeadAttention, BiAttention
__all__ = [
"MaxPool",
"MaxPoolWithMask",
"AvgPool",
"MultiHeadAttention",
"BiAttention"
]

+ 2
- 1
fastNLP/modules/aggregator/attention.py View File

@@ -12,7 +12,8 @@ from ..utils import initial_parameter


class DotAttention(nn.Module): class DotAttention(nn.Module):
""" """
TODO
.. todo::
补上文档
""" """
def __init__(self, key_size, value_size, dropout=0): def __init__(self, key_size, value_size, dropout=0):
super(DotAttention, self).__init__() super(DotAttention, self).__init__()


+ 7
- 1
fastNLP/modules/decoder/__init__.py View File

@@ -1,5 +1,11 @@
__all__ = ["MLP", "ConditionalRandomField", "viterbi_decode", "allowed_transitions"]
from .CRF import ConditionalRandomField from .CRF import ConditionalRandomField
from .MLP import MLP from .MLP import MLP
from .utils import viterbi_decode from .utils import viterbi_decode
from .CRF import allowed_transitions from .CRF import allowed_transitions

__all__ = [
"MLP",
"ConditionalRandomField",
"viterbi_decode",
"allowed_transitions"
]

+ 6
- 4
fastNLP/modules/encoder/__init__.py View File

@@ -3,7 +3,9 @@ from .embedding import Embedding
from .lstm import LSTM from .lstm import LSTM
from .bert import BertModel from .bert import BertModel


__all__ = ["LSTM",
"Embedding",
"ConvMaxpool",
"BertModel"]
__all__ = [
"LSTM",
"Embedding",
"ConvMaxpool",
"BertModel"
]

+ 4
- 1
fastNLP/modules/utils.py View File

@@ -70,7 +70,10 @@ def initial_parameter(net, initial_method=None):


def get_embeddings(init_embed): def get_embeddings(init_embed):
""" """
得到词嵌入 TODO
得到词嵌入
.. todo::
补上文档


:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,


fastNLP/api/README.md → legacy/api/README.md View File


fastNLP/api/__init__.py → legacy/api/__init__.py View File


fastNLP/api/api.py → legacy/api/api.py View File

@@ -5,13 +5,13 @@ import torch
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
import os import os


from ..core.dataset import DataSet
from fastNLP.core.dataset import DataSet
from .utils import load_url from .utils import load_url
from .processor import ModelProcessor from .processor import ModelProcessor
from ..io.dataset_loader import _cut_long_sentence, ConllLoader
from ..core.instance import Instance
from fastNLP.io.dataset_loader import _cut_long_sentence, ConllLoader
from fastNLP.core.instance import Instance
from ..api.pipeline import Pipeline from ..api.pipeline import Pipeline
from ..core.metrics import SpanFPreRecMetric
from fastNLP.core.metrics import SpanFPreRecMetric
from .processor import IndexerProcessor from .processor import IndexerProcessor


# TODO add pretrain urls # TODO add pretrain urls

fastNLP/api/converter.py → legacy/api/converter.py View File


fastNLP/api/examples.py → legacy/api/examples.py View File


fastNLP/api/pipeline.py → legacy/api/pipeline.py View File


fastNLP/api/processor.py → legacy/api/processor.py View File

@@ -3,10 +3,10 @@ from collections import defaultdict


import torch import torch


from ..core.batch import Batch
from ..core.dataset import DataSet
from ..core.sampler import SequentialSampler
from ..core.vocabulary import Vocabulary
from fastNLP.core.batch import Batch
from fastNLP.core.dataset import DataSet
from fastNLP.core.sampler import SequentialSampler
from fastNLP.core.vocabulary import Vocabulary




class Processor(object): class Processor(object):
@@ -232,7 +232,7 @@ class SeqLenProcessor(Processor):
return dataset return dataset




from ..core.utils import _build_args
from fastNLP.core.utils import _build_args




class ModelProcessor(Processor): class ModelProcessor(Processor):
@@ -257,10 +257,7 @@ class ModelProcessor(Processor):
data_iterator = Batch(dataset, batch_size=self.batch_size, sampler=SequentialSampler()) data_iterator = Batch(dataset, batch_size=self.batch_size, sampler=SequentialSampler())


batch_output = defaultdict(list) batch_output = defaultdict(list)
if hasattr(self.model, "predict"):
predict_func = self.model.predict
else:
predict_func = self.model.forward
predict_func = self.model.forward
with torch.no_grad(): with torch.no_grad():
for batch_x, _ in data_iterator: for batch_x, _ in data_iterator:
refined_batch_x = _build_args(predict_func, **batch_x) refined_batch_x = _build_args(predict_func, **batch_x)

fastNLP/api/utils.py → legacy/api/utils.py View File

@@ -22,7 +22,7 @@ except ImportError:
try: try:
from tqdm.auto import tqdm from tqdm.auto import tqdm
except: except:
from ..core.utils import _pseudo_tqdm as tqdm
from fastNLP.core.utils import _pseudo_tqdm as tqdm
# matches bfd8deac from resnet18-bfd8deac.pth # matches bfd8deac from resnet18-bfd8deac.pth
HASH_REGEX = re.compile(r'-([a-f0-9]*)\.') HASH_REGEX = re.compile(r'-([a-f0-9]*)\.')

fastNLP/automl/__init__.py → legacy/automl/__init__.py View File


fastNLP/automl/enas_controller.py → legacy/automl/enas_controller.py View File


fastNLP/automl/enas_model.py → legacy/automl/enas_model.py View File


fastNLP/automl/enas_trainer.py → legacy/automl/enas_trainer.py View File

@@ -11,15 +11,15 @@ import torch
try: try:
from tqdm.auto import tqdm from tqdm.auto import tqdm
except: except:
from ..core.utils import _pseudo_tqdm as tqdm
from fastNLP.core.utils import _pseudo_tqdm as tqdm


from ..core.batch import Batch
from ..core.callback import CallbackException
from ..core.dataset import DataSet
from ..core.utils import _move_dict_value_to_device
from fastNLP.core.batch import Batch
from fastNLP.core.callback import CallbackException
from fastNLP.core.dataset import DataSet
from fastNLP.core.utils import _move_dict_value_to_device
import fastNLP import fastNLP
from . import enas_utils as utils from . import enas_utils as utils
from ..core.utils import _build_args
from fastNLP.core.utils import _build_args


from torch.optim import Adam from torch.optim import Adam



fastNLP/automl/enas_utils.py → legacy/automl/enas_utils.py View File


fastNLP/component/__init__.py → legacy/component/__init__.py View File


fastNLP/component/bert_tokenizer.py → legacy/component/bert_tokenizer.py View File


+ 0
- 6
test/api/test_pipeline.py View File

@@ -1,6 +0,0 @@
import unittest


class TestPipeline(unittest.TestCase):
def test_case(self):
pass

+ 0
- 101
test/api/test_processor.py View File

@@ -1,101 +0,0 @@
import random
import unittest

import numpy as np

from fastNLP import Vocabulary, Instance
from fastNLP.api.processor import FullSpaceToHalfSpaceProcessor, PreAppendProcessor, SliceProcessor, Num2TagProcessor, \
IndexerProcessor, VocabProcessor, SeqLenProcessor, ModelProcessor, Index2WordProcessor, SetTargetProcessor, \
SetInputProcessor, VocabIndexerProcessor
from fastNLP.core.dataset import DataSet


class TestProcessor(unittest.TestCase):
def test_FullSpaceToHalfSpaceProcessor(self):
ds = DataSet({"word": ["00, u1, u), (u2, u2"]})
proc = FullSpaceToHalfSpaceProcessor("word")
ds = proc(ds)
self.assertEqual(ds.field_arrays["word"].content, ["00, u1, u), (u2, u2"])

def test_PreAppendProcessor(self):
ds = DataSet({"word": [["1234", "3456"], ["8789", "3464"]]})
proc = PreAppendProcessor(data="abc", field_name="word")
ds = proc(ds)
self.assertEqual(ds.field_arrays["word"].content, [["abc", "1234", "3456"], ["abc", "8789", "3464"]])

def test_SliceProcessor(self):
ds = DataSet({"xx": [[random.randint(0, 10) for _ in range(30)]] * 40})
proc = SliceProcessor(10, 20, 2, "xx", new_added_field_name="yy")
ds = proc(ds)
self.assertEqual(len(ds.field_arrays["yy"].content[0]), 5)

def test_Num2TagProcessor(self):
ds = DataSet({"num": [["99.9982", "2134.0"], ["0.002", "234"]]})
proc = Num2TagProcessor("<num>", "num")
ds = proc(ds)
for data in ds.field_arrays["num"].content:
for d in data:
self.assertEqual(d, "<num>")

def test_VocabProcessor_and_IndexerProcessor(self):
ds = DataSet({"xx": [[str(random.randint(0, 10)) for _ in range(30)]] * 40})
vocab_proc = VocabProcessor("xx")
vocab_proc(ds)
vocab = vocab_proc.vocab
self.assertTrue(isinstance(vocab, Vocabulary))
self.assertTrue(len(vocab) > 5)

proc = IndexerProcessor(vocab, "xx", "yy")
ds = proc(ds)
for data in ds.field_arrays["yy"].content[0]:
self.assertTrue(isinstance(data, int))

def test_SeqLenProcessor(self):
ds = DataSet({"xx": [[str(random.randint(0, 10)) for _ in range(30)]] * 10})
proc = SeqLenProcessor("xx", "len")
ds = proc(ds)
for data in ds.field_arrays["len"].content:
self.assertEqual(data, 30)

def test_ModelProcessor(self):
from fastNLP.models.cnn_text_classification import CNNText
model = CNNText((100, 100), 5)
ins_list = []
for _ in range(64):
seq_len = np.random.randint(5, 30)
ins_list.append(Instance(word_seq=[np.random.randint(0, 100) for _ in range(seq_len)], seq_lens=seq_len))
data_set = DataSet(ins_list)
data_set.set_input("word_seq", "seq_lens")
proc = ModelProcessor(model)
data_set = proc(data_set)
self.assertTrue("pred" in data_set)

def test_Index2WordProcessor(self):
vocab = Vocabulary()
vocab.add_word_lst(["a", "b", "c", "d", "e"])
proc = Index2WordProcessor(vocab, "tag_id", "tag")
data_set = DataSet([Instance(tag_id=[np.random.randint(0, 7) for _ in range(32)])])
data_set = proc(data_set)
self.assertTrue("tag" in data_set)

def test_SetTargetProcessor(self):
proc = SetTargetProcessor("a", "b", "c")
data_set = DataSet({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
data_set = proc(data_set)
self.assertTrue(data_set["a"].is_target)
self.assertTrue(data_set["b"].is_target)
self.assertTrue(data_set["c"].is_target)

def test_SetInputProcessor(self):
proc = SetInputProcessor("a", "b", "c")
data_set = DataSet({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
data_set = proc(data_set)
self.assertTrue(data_set["a"].is_input)
self.assertTrue(data_set["b"].is_input)
self.assertTrue(data_set["c"].is_input)

def test_VocabIndexerProcessor(self):
proc = VocabIndexerProcessor("word_seq", "word_ids")
data_set = DataSet([Instance(word_seq=["a", "b", "c", "d", "e"])])
data_set = proc(data_set)
self.assertTrue("word_ids" in data_set)

+ 0
- 111
test/automl/test_enas.py View File

@@ -1,111 +0,0 @@
import unittest

from fastNLP import DataSet
from fastNLP import Instance
from fastNLP import Vocabulary
from fastNLP.core.losses import CrossEntropyLoss
from fastNLP.core.metrics import AccuracyMetric


class TestENAS(unittest.TestCase):
def testENAS(self):
# 从csv读取数据到DataSet
sample_path = "tutorials/sample_data/tutorial_sample_dataset.csv"
dataset = DataSet.read_csv(sample_path, headers=('raw_sentence', 'label'),
sep='\t')
print(len(dataset))
print(dataset[0])
print(dataset[-3])

dataset.append(Instance(raw_sentence='fake data', label='0'))
# 将所有数字转为小写
dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence')
# label转int
dataset.apply(lambda x: int(x['label']), new_field_name='label')

# 使用空格分割句子
def split_sent(ins):
return ins['raw_sentence'].split()

dataset.apply(split_sent, new_field_name='words')

# 增加长度信息
dataset.apply(lambda x: len(x['words']), new_field_name='seq_len')
print(len(dataset))
print(dataset[0])

# DataSet.drop(func)筛除数据
dataset.drop(lambda x: x['seq_len'] <= 3, inplace=True)
print(len(dataset))

# 设置DataSet中,哪些field要转为tensor
# set target,loss或evaluate中的golden,计算loss,模型评估时使用
dataset.set_target("label")
# set input,模型forward时使用
dataset.set_input("words", "seq_len")

# 分出测试集、训练集
test_data, train_data = dataset.split(0.5)
print(len(test_data))
print(len(train_data))

# 构建词表, Vocabulary.add(word)
vocab = Vocabulary(min_freq=2)
train_data.apply(lambda x: [vocab.add(word) for word in x['words']])
vocab.build_vocab()

# index句子, Vocabulary.to_index(word)
train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
test_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
print(test_data[0])

# 如果你们需要做强化学习或者GAN之类的项目,你们也可以使用这些数据预处理的工具
from fastNLP.core.batch import Batch
from fastNLP.core.sampler import RandomSampler

batch_iterator = Batch(dataset=train_data, batch_size=2, sampler=RandomSampler())
for batch_x, batch_y in batch_iterator:
print("batch_x has: ", batch_x)
print("batch_y has: ", batch_y)
break

from fastNLP.automl.enas_model import ENASModel
from fastNLP.automl.enas_controller import Controller
model = ENASModel(embed_num=len(vocab), num_classes=5)
controller = Controller()

from fastNLP.automl.enas_trainer import ENASTrainer

# 更改DataSet中对应field的名称,要以模型的forward等参数名一致
train_data.rename_field('words', 'word_seq') # input field 与 forward 参数一致
train_data.rename_field('label', 'label_seq')
test_data.rename_field('words', 'word_seq')
test_data.rename_field('label', 'label_seq')

loss = CrossEntropyLoss(pred="output", target="label_seq")
metric = AccuracyMetric(pred="predict", target="label_seq")

trainer = ENASTrainer(model=model, controller=controller, train_data=train_data, dev_data=test_data,
loss=CrossEntropyLoss(pred="output", target="label_seq"),
metrics=AccuracyMetric(pred="predict", target="label_seq"),
check_code_level=-1,
save_path=None,
batch_size=32,
print_every=1,
n_epochs=3,
final_epochs=1)
trainer.train()
print('Train finished!')

# 调用Tester在test_data上评价效果
from fastNLP import Tester

tester = Tester(data=test_data, model=model, metrics=AccuracyMetric(pred="predict", target="label_seq"),
batch_size=4)

acc = tester.test()
print(acc)


if __name__ == '__main__':
unittest.main()

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save