From 7413276997731b3e816444c1db3caf624b743405 Mon Sep 17 00:00:00 2001
From: zide05 <845465009@qq.com>
Date: Sun, 22 Sep 2019 09:47:33 +0800
Subject: [PATCH] modify pipe documents

---
 fastNLP/io/__init__.py            |   7 ++
 fastNLP/io/pipe/__init__.py       |   4 +-
 fastNLP/io/pipe/classification.py | 162 ++++++++++++++++++++++++------
 fastNLP/io/pipe/conll.py          | 103 +++++++++++++++----
 fastNLP/io/pipe/coreference.py    |  30 ++++--
 fastNLP/io/pipe/cws.py            |  19 +++-
 fastNLP/io/pipe/matching.py       |  51 ++++++++--
 7 files changed, 303 insertions(+), 73 deletions(-)

diff --git a/fastNLP/io/__init__.py b/fastNLP/io/__init__.py
index c8b3dfaa..63fde69a 100644
--- a/fastNLP/io/__init__.py
+++ b/fastNLP/io/__init__.py
@@ -25,6 +25,8 @@ __all__ = [
     'SSTLoader',
     'SST2Loader',
     "ChnSentiCorpLoader",
+    "THUCNewsLoader",
+    "WeiboSenti100kLoader",
 
     'ConllLoader',
     'Conll2003Loader',
@@ -45,6 +47,9 @@ __all__ = [
     "SNLILoader",
     "QNLILoader",
     "RTELoader",
+    "XNLILoader",
+    "BQCorpusLoader",
+    "LCQMCLoader",
 
     "Pipe",
 
@@ -54,6 +59,8 @@ __all__ = [
     "SST2Pipe",
     "IMDBPipe",
     "ChnSentiCorpPipe",
+    "THUCNewsPipe",
+    "WeiboSenti100kPipe",
 
     "Conll2003Pipe",
     "Conll2003NERPipe",
diff --git a/fastNLP/io/pipe/__init__.py b/fastNLP/io/pipe/__init__.py
index 0ddb1f2d..212f9e66 100644
--- a/fastNLP/io/pipe/__init__.py
+++ b/fastNLP/io/pipe/__init__.py
@@ -18,6 +18,8 @@ __all__ = [
     "SST2Pipe",
     "IMDBPipe",
     "ChnSentiCorpPipe",
+    "THUCNewsPipe",
+    "WeiboSenti100kPipe",
 
     "Conll2003NERPipe",
     "OntoNotesNERPipe",
@@ -42,7 +44,7 @@ __all__ = [
     "CoReferencePipe"
 ]
 
-from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe
+from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe
 from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe
 from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \
     MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe
diff --git a/fastNLP/io/pipe/classification.py b/fastNLP/io/pipe/classification.py
index 409cfe53..1c44cc23 100644
--- a/fastNLP/io/pipe/classification.py
+++ b/fastNLP/io/pipe/classification.py
@@ -97,11 +97,22 @@ class YelpFullPipe(_CLSPipe):
     处理YelpFull的数据, 处理之后DataSet中的内容如下
 
     .. csv-table:: 下面是使用YelpFullPipe处理后的DataSet所具备的field
-        :header: "raw_words", "words", "target", "seq_len"
+        :header: "raw_words", "target", "words",  "seq_len"
+
+        "I got 'new' tires from them and within...", 0 ,"[7, 110, 22, 107, 22, 499, 59, 140, 3,...]", 160
+        " Don't waste your time.  We had two dif... ", 0, "[277, 17, 278, 38, 30, 112, 24, 85, 27...", 40
+        "...", ., "[...]", .
 
-        "It 's a ...", "[4, 2, 10, ...]", 0, 10
-        "Offers that ...", "[20, 40, ...]", 1, 21
-        "...", "[...]", ., .
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   | False  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     """
     
@@ -193,11 +204,22 @@ class YelpPolarityPipe(_CLSPipe):
     处理YelpPolarity的数据, 处理之后DataSet中的内容如下
 
     .. csv-table:: 下面是使用YelpFullPipe处理后的DataSet所具备的field
-        :header: "raw_words", "words", "target", "seq_len"
+        :header: "raw_words", "target", "words", "seq_len"
 
-        "It 's a ...", "[4, 2, 10, ...]", 0, 10
-        "Offers that ...", "[20, 40, ...]", 1, 21
-        "...", "[...]", ., .
+        "I got 'new' tires from them and within...", 0 ,"[7, 110, 22, 107, 22, 499, 59, 140, 3,...]", 160
+        " Don't waste your time.  We had two dif... ", 0, "[277, 17, 278, 38, 30, 112, 24, 85, 27...", 40
+        "...", ., "[...]", .
+
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   | False  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     """
     
@@ -211,6 +233,19 @@ class YelpPolarityPipe(_CLSPipe):
         self.lower = lower
     
     def process(self, data_bundle):
+        """
+        传入的DataSet应该具备如下的结构
+
+        .. csv-table::
+            :header: "raw_words", "target"
+
+            "I got 'new' tires from them and... ", "1"
+            "Don't waste your time.  We had two...", "1"
+            "...", "..."
+
+        :param data_bundle:
+        :return:
+        """
         # 复制一列words
         data_bundle = _add_words_field(data_bundle, lower=self.lower)
         
@@ -244,9 +279,20 @@ class SSTPipe(_CLSPipe):
     .. csv-table:: 下面是使用SSTPipe处理后的DataSet所具备的field
         :header: "raw_words", "words", "target", "seq_len"
 
-        "It 's a ...", "[4, 2, 10, ...]", 0, 16
-        "Offers that ...", "[20, 40, ...]", 1, 18
-        "...", "[...]", ., .
+        "It 's a lovely film with lovely perfor...", 1, "[187, 6, 5, 132, 120, 70, 132, 188, 25...", 13
+        "No one goes unindicted here , which is...", 0, "[191, 126, 192, 193, 194, 4, 195, 17, ...", 13
+        "...", ., "[...]", .
+
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   | False  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     """
     
@@ -278,11 +324,11 @@ class SSTPipe(_CLSPipe):
         """
         对DataBundle中的数据进行预处理。输入的DataSet应该至少拥有raw_words这一列，且内容类似与
 
-        .. csv-table::
+        .. csv-table:: 下面是使用SSTLoader读取的DataSet所具备的field
             :header: "raw_words"
 
-            "(3 (2 It) (4 (4 (2 's) (4 (3 (2 a)..."
-            "(4 (4 (2 Offers) (3 (3 (2 that) (3 (3 rare)..."
+            "(2 (3 (3 Effective) (2 but)) (1 (1 too-tepid)..."
+            "(3 (3 (2 If) (3 (2 you) (3 (2 sometimes) ..."
             "..."
 
         :param ~fastNLP.io.DataBundle data_bundle: 需要处理的DataBundle对象
@@ -335,12 +381,23 @@ class SST2Pipe(_CLSPipe):
     加载SST2的数据, 处理完成之后DataSet将拥有以下的field
 
     .. csv-table::
-       :header: "raw_words", "words", "target", "seq_len"
+       :header: "raw_words", "target", "words", "seq_len"
 
-       "it 's a charming and... ", "[3, 4, 5, 6, 7,...]", 1, 43
-       "unflinchingly bleak and...", "[10, 11, 7,...]", 1, 21
+       "it 's a charming and often affecting j... ", 1, "[19, 9, 6, 111, 5, 112, 113, 114, 3]", 9
+       "unflinchingly bleak and desperate", 0, "[115, 116, 5, 117]", 4
        "...", "...", ., .
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   | False  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def __init__(self, lower=False, tokenizer='spacy'):
@@ -357,11 +414,11 @@ class SST2Pipe(_CLSPipe):
         可以处理的DataSet应该具备如下的结构
 
         .. csv-table::
-           :header: "raw_words", "target"
+            :header: "raw_words", "target"
 
-           "it 's a charming and... ", 1
-           "unflinchingly bleak and...", 1
-           "...", "..."
+            "it 's a charming and often affecting...", "1"
+            "unflinchingly bleak and...", "0"
+            "..."
 
         :param data_bundle:
         :return:
@@ -420,15 +477,26 @@ class IMDBPipe(_CLSPipe):
     经过本Pipe处理后DataSet将如下
 
     .. csv-table:: 输出DataSet的field
-       :header: "raw_words", "words", "target", "seq_len"
+       :header: "raw_words", "target", "words", "seq_len"
 
-       "Bromwell High is a cartoon ... ", "[3, 5, 6, 9, ...]", 0, 20
-       "Story of a man who has ...", "[20, 43, 9, 10, ...]", 1, 31
-       "...", "[...]", ., .
+       "Bromwell High is a cartoon ... ", 0, "[3, 5, 6, 9, ...]", 20
+       "Story of a man who has ...", 1, "[20, 43, 9, 10, ...]", 31
+       "...", ., "[...]", .
 
     其中raw_words为str类型，是原文; words是转换为index的输入; target是转换为index的目标值;
     words列被设置为input; target列被设置为target。
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   | False  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def __init__(self, lower: bool = False, tokenizer: str = 'spacy'):
@@ -493,13 +561,23 @@ class ChnSentiCorpPipe(Pipe):
     处理之后的DataSet有以下的结构
 
     .. csv-table::
-        :header: "raw_chars", "chars", "target", "seq_len"
+        :header: "raw_chars", "target", "chars", "seq_len"
 
-        "這間酒店環境和服務態度亦算不錯,但房間空間太小~~", "[2, 3, 4, 5, ...]", 1, 31
-        "<荐书> 推荐所有喜欢<红楼>...", "[10, 21, ....]", 1, 25
+        "這間酒店環境和服務態度亦算不錯,但房間空間太小~~", 1, "[2, 3, 4, 5, ...]", 31
+        "<荐书> 推荐所有喜欢<红楼>...", 1, "[10, 21, ....]", 25
         "..."
 
     其中chars, seq_len是input，target是target
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     """
     def __init__(self, bigrams=False, trigrams=False):
@@ -590,12 +668,22 @@ class THUCNewsPipe(_CLSPipe):
     处理之后的DataSet有以下的结构
 
     .. csv-table::
-        :header: "raw_chars", "chars", "target", "seq_len"
+        :header: "raw_chars", "target", "chars", "seq_len"
 
-        "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道...", "[409, 1197, 2146, 213, ...]", 0, 746
+        "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道...", 0, "[409, 1197, 2146, 213, ...]", 746
         "..."
 
     其中chars, seq_len是input，target是target
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     :param bool bigrams: 是否增加一列bigrams. bigrams的构成是['复', '旦', '大', '学', ...]->["复旦", "旦大", ...]。如果
         设置为True，返回的DataSet将有一列名为bigrams, 且已经转换为了index并设置为input，对应的vocab可以通过
@@ -691,12 +779,22 @@ class WeiboSenti100kPipe(_CLSPipe):
     处理之后的DataSet有以下的结构
 
     .. csv-table::
-        :header: "raw_chars", "chars", "target", "seq_len"
+        :header: "raw_chars", "target", "chars", "seq_len"
 
-        "六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "[0, 690, 18, ...]", 0, 56
+        "六一出生的？好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", 0, "[0, 690, 18, ...]", 56
         "..."
 
     其中chars, seq_len是input，target是target
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |  False  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
 
     :param bool bigrams: 是否增加一列bigrams. bigrams的构成是['复', '旦', '大', '学', ...]->["复旦", "旦大", ...]。如果
         设置为True，返回的DataSet将有一列名为bigrams, 且已经转换为了index并设置为input，对应的vocab可以通过
diff --git a/fastNLP/io/pipe/conll.py b/fastNLP/io/pipe/conll.py
index 70af5acb..918cff9f 100644
--- a/fastNLP/io/pipe/conll.py
+++ b/fastNLP/io/pipe/conll.py
@@ -87,15 +87,26 @@ class Conll2003NERPipe(_NERPipe):
     经过该Pipe过后，DataSet中的内容如下所示
 
     .. csv-table:: Following is a demo layout of DataSet returned by Conll2003Loader
-       :header: "raw_words", "words", "target", "seq_len"
+       :header: "raw_words", "target", "words", "seq_len"
 
-       "[Nadim, Ladki]", "[2, 3]", "[1, 2]", 2
-       "[AL-AIN, United, Arab, ...]", "[4, 5, 6,...]", "[3, 4,...]", 6
+       "[Nadim, Ladki]", "[1, 2]", "[2, 3]", 2
+       "[AL-AIN, United, Arab, ...]", "[3, 4,...]", "[4, 5, 6,...]", 6
        "[...]", "[...]", "[...]", .
 
     raw_words列为List[str], 是未转换的原始数据; words列为List[int]，是转换为index的输入数据; target列是List[int]，是转换为index的
     target。返回的DataSet中被设置为input有words, target, seq_len; 设置为target有target。
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def process_from_file(self, paths) -> DataBundle:
@@ -112,17 +123,28 @@ class Conll2003NERPipe(_NERPipe):
 
 
 class Conll2003Pipe(Pipe):
-    r"""
+    """
     经过该Pipe后，DataSet中的内容如下
 
     .. csv-table::
-       :header: "raw_words" , "words", "pos", "chunk", "ner", "seq_len"
+       :header: "raw_words" , "pos", "chunk", "ner", "words", "seq_len"
 
-       "[Nadim, Ladki]", "[2, 3]", "[0, 0]", "[1, 2]", "[1, 2]", 2
-       "[AL-AIN, United, Arab, ...]", "[4, 5, 6,...]", "[1, 2...]", "[3, 4...]", "[3, 4...]", 6
+       "[Nadim, Ladki]", "[0, 0]", "[1, 2]", "[1, 2]", "[2, 3]", 2
+       "[AL-AIN, United, Arab, ...]", "[1, 2...]", "[3, 4...]", "[3, 4...]", "[4, 5, 6,...]", 6
        "[...]", "[...]", "[...]", "[...]", "[...]", .
 
     其中words, seq_len是input; pos, chunk, ner, seq_len是target
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+-------+-------+-------+-------+---------+
+        | field_names | raw_words |  pos  | chunk |  ner  | words | seq_len |
+        +-------------+-----------+-------+-------+-------+-------+---------+
+        |   is_input  |   False   | False | False | False |  True |   True  |
+        |  is_target  |   False   |  True |  True |  True | False |   True  |
+        | ignore_type |           | False | False | False | False |  False  |
+        |  pad_value  |           |   0   |   0   |   0   |   0   |    0    |
+        +-------------+-----------+-------+-------+-------+-------+---------+
+
 
     """
     def __init__(self, chunk_encoding_type='bioes', ner_encoding_type='bioes', lower: bool = False):
@@ -202,15 +224,26 @@ class OntoNotesNERPipe(_NERPipe):
     处理OntoNotes的NER数据，处理之后DataSet中的field情况为
 
     .. csv-table::
-       :header: "raw_words", "words", "target", "seq_len"
+       :header: "raw_words", "target", "words", "seq_len"
 
-       "[Nadim, Ladki]", "[2, 3]", "[1, 2]", 2
-       "[AL-AIN, United, Arab, ...]", "[4, 5, 6,...]", "[3, 4]", 6
+       "[Nadim, Ladki]", "[1, 2]", "[2, 3]", 2
+       "[AL-AIN, United, Arab, ...]", "[3, 4]", "[4, 5, 6,...]", 6
        "[...]", "[...]", "[...]", .
 
     raw_words列为List[str], 是未转换的原始数据; words列为List[int]，是转换为index的输入数据; target列是List[int]，是转换为index的
     target。返回的DataSet中被设置为input有words, target, seq_len; 设置为target有target。
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_words | target | words | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def process_from_file(self, paths):
@@ -306,15 +339,26 @@ class MsraNERPipe(_CNNERPipe):
     处理MSRA-NER的数据，处理之后的DataSet的field情况为
 
     .. csv-table::
-       :header: "raw_chars", "chars", "target", "seq_len"
+       :header: "raw_chars", "target", "chars", "seq_len"
 
-       "[相, 比, 之, 下,...]", "[2, 3, 4, 5, ...]", "[0, 0, 0, 0, ...]", 11
-       "[青, 岛, 海, 牛, 队, 和, ...]", "[10, 21, ....]", "[1, 2, 3, ...]", 21
+       "[相, 比, 之, 下,...]", "[0, 0, 0, 0, ...]", "[2, 3, 4, 5, ...]", 11
+       "[青, 岛, 海, 牛, 队, 和, ...]", "[1, 2, 3, ...]", "[10, 21, ....]", 21
        "[...]", "[...]", "[...]", .
 
     raw_chars列为List[str], 是未转换的原始数据; chars列为List[int]，是转换为index的输入数据; target列是List[int]，是转换为index的
     target。返回的DataSet中被设置为input有chars, target, seq_len; 设置为target有target。
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def process_from_file(self, paths=None) -> DataBundle:
@@ -327,14 +371,26 @@ class PeopleDailyPipe(_CNNERPipe):
     处理people daily的ner的数据，处理之后的DataSet的field情况为
 
     .. csv-table::
-       :header: "raw_chars", "chars", "target", "seq_len"
+       :header: "raw_chars", "target", "chars", "seq_len"
 
-       "[相, 比, 之, 下,...]", "[2, 3, 4, 5, ...]", "[0, 0, 0, 0, ...]", 11
-       "[青, 岛, 海, 牛, 队, 和, ...]", "[10, 21, ....]", "[1, 2, 3, ...]", 21
+       "[相, 比, 之, 下,...]", "[0, 0, 0, 0, ...]", "[2, 3, 4, 5, ...]", 11
+       "[青, 岛, 海, 牛, 队, 和, ...]", "[1, 2, 3, ...]", "[10, 21, ....]", 21
        "[...]", "[...]", "[...]", .
 
     raw_chars列为List[str], 是未转换的原始数据; chars列为List[int]，是转换为index的输入数据; target列是List[int]，是转换为index的
     target。返回的DataSet中被设置为input有chars, target, seq_len; 设置为target有target。
+
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def process_from_file(self, paths=None) -> DataBundle:
@@ -349,13 +405,24 @@ class WeiboNERPipe(_CNNERPipe):
     .. csv-table::
        :header: "raw_chars", "chars", "target", "seq_len"
 
-       "[相, 比, 之, 下,...]", "[2, 3, 4, 5, ...]", "[0, 0, 0, 0, ...]", 11
-       "[青, 岛, 海, 牛, 队, 和, ...]", "[10, 21, ....]", "[1, 2, 3, ...]", 21
+       "['老', '百', '姓']", "[4, 3, 3]", "[38, 39, 40]", 3
+       "['心']", "[0]", "[41]", 1
        "[...]", "[...]", "[...]", .
 
     raw_chars列为List[str], 是未转换的原始数据; chars列为List[int]，是转换为index的输入数据; target列是List[int]，是转换为index的
     target。返回的DataSet中被设置为input有chars, target, seq_len; 设置为target有target。
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
     
     def process_from_file(self, paths=None) -> DataBundle:
diff --git a/fastNLP/io/pipe/coreference.py b/fastNLP/io/pipe/coreference.py
index c1b218a5..0cf6c996 100644
--- a/fastNLP/io/pipe/coreference.py
+++ b/fastNLP/io/pipe/coreference.py
@@ -18,9 +18,29 @@ from ...core.const import Const
 class CoReferencePipe(Pipe):
     """
     对Coreference resolution问题进行处理，得到文章种类/说话者/字符级信息/序列长度。
+
+    处理完成后数据包含文章类别、speaker信息、句子信息、句子对应的index、char、句子长度、target：
+
+        .. csv-table::
+           :header: "words1", "words2","words3","words4","chars","seq_len","target"
+
+           "bc", "[[0,0],[1,1]]","[['I','am'],[]]","[[1,2],[]]","[[[1],[2,3]],[]]","[2,3]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
+           "[...]", "[...]","[...]","[...]","[...]","[...]","[...]"
+
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+--------+-------+---------+
+        | field_names | raw_chars | target | chars | seq_len |
+        +-------------+-----------+--------+-------+---------+
+        |   is_input  |   False   |  True  |  True |   True  |
+        |  is_target  |   False   |  True  | False |   True  |
+        | ignore_type |           | False  | False |  False  |
+        |  pad_value  |           |   0    |   0   |    0    |
+        +-------------+-----------+--------+-------+---------+
+
     """
 
-    def __init__(self,config):
+    def __init__(self, config):
         super().__init__()
         self.config = config
 
@@ -35,14 +55,6 @@ class CoReferencePipe(Pipe):
            "bc/cctv/00/cctv_0000_1", "[['Speaker#1', 'peaker#1'],[]]","[['He','is'],[]]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
            "[...]", "[...]","[...]","[...]"
 
-        处理完成后数据包含文章类别、speaker信息、句子信息、句子对应的index、char、句子长度、target：
-        
-        .. csv-table::
-           :header: "words1", "words2","words3","words4","chars","seq_len","target"
-
-           "bc", "[[0,0],[1,1]]","[['I','am'],[]]","[[1,2],[]]","[[[1],[2,3]],[]]","[2,3]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
-           "[...]", "[...]","[...]","[...]","[...]","[...]","[...]"
-
 
         :param data_bundle:
         :return:
diff --git a/fastNLP/io/pipe/cws.py b/fastNLP/io/pipe/cws.py
index 97bda896..a2f2e7a2 100644
--- a/fastNLP/io/pipe/cws.py
+++ b/fastNLP/io/pipe/cws.py
@@ -138,13 +138,22 @@ class CWSPipe(Pipe):
     对CWS数据进行预处理, 处理之后的数据，具备以下的结构
 
     .. csv-table::
-       :header: "raw_words", "chars", "target", "bigrams", "trigrams", "seq_len"
+       :header: "raw_words", "chars", "target", "seq_len"
 
-       "共同  创造  美好...", "[2, 3, 4...]", "[0, 2, 0, 2,...]", "[10, 4, 1,...]","[6, 4, 1,...]", 13
-       "2001年  新年  钟声...", "[8, 9, 9, 7, ...]", "[0, 1, 1, 1, 2...]", "[11, 12, ...]","[3, 9, ...]", 20
-       "...", "[...]","[...]", "[...]","[...]", .
+       "共同  创造  美好...", "[2, 3, 4...]", "[0, 2, 0, 2,...]", 13
+       "2001年  新年  钟声...", "[8, 9, 9, 7, ...]", "[0, 1, 1, 1, 2...]", 20
+       "...", "[...]","[...]", .
 
-    其中bigrams仅当bigrams列为True的时候存在
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+-----------+-------+--------+---------+
+        | field_names | raw_words | chars | target | seq_len |
+        +-------------+-----------+-------+--------+---------+
+        |   is_input  |   False   |  True |  True  |   True  |
+        |  is_target  |   False   | False |  True  |   True  |
+        | ignore_type |           | False | False  |  False  |
+        |  pad_value  |           |   0   |   0    |    0    |
+        +-------------+-----------+-------+--------+---------+
 
     """
     
diff --git a/fastNLP/io/pipe/matching.py b/fastNLP/io/pipe/matching.py
index def750c0..7747dec3 100644
--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -37,16 +37,27 @@ class MatchingBertPipe(Pipe):
     Matching任务的Bert pipe，输出的DataSet将包含以下的field
 
     .. csv-table::
-       :header: "raw_words1", "raw_words2", "words", "target", "seq_len"
+       :header: "raw_words1", "raw_words2", "target", "words", "seq_len"
 
-       "The new rights are...", "Everyone really likes..",  "[2, 3, 4, 5, ...]", 1, 10
-       "This site includes a...", "The Government Executive...", "[11, 12, 13,...]", 0, 5
-       "...", "...", "[...]", ., .
+       "The new rights are...", "Everyone really likes..", 1,  "[2, 3, 4, 5, ...]", 10
+       "This site includes a...", "The Government Executive...", 0, "[11, 12, 13,...]", 5
+       "...", "...", ., "[...]", .
 
     words列是将raw_words1(即premise), raw_words2(即hypothesis)使用"[SEP]"链接起来转换为index的。
     words列被设置为input，target列被设置为target和input(设置为input以方便在forward函数中计算loss，
     如果不在forward函数中计算loss也不影响，fastNLP将根据forward函数的形参名进行传参).
 
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+------------+------------+--------+-------+---------+
+        | field_names | raw_words1 | raw_words2 | target | words | seq_len |
+        +-------------+------------+------------+--------+-------+---------+
+        |   is_input  |   False    |   False    | False  |  True |   True  |
+        |  is_target  |   False    |   False    |  True  | False |  False  |
+        | ignore_type |            |            | False  | False |  False  |
+        |  pad_value  |            |            |   0    |   0   |    0    |
+        +-------------+------------+------------+--------+-------+---------+
+
     """
     
     def __init__(self, lower=False, tokenizer: str = 'raw'):
@@ -75,6 +86,18 @@ class MatchingBertPipe(Pipe):
         return data_bundle
     
     def process(self, data_bundle):
+        """
+        输入的data_bundle中的dataset需要具有以下结构：
+
+        .. csv-table::
+            :header: "raw_words1", "raw_words2", "target"
+
+            "Dana Reeve, the widow of the actor...", "Christopher Reeve had an...", "not_entailment"
+            "...","..."
+
+        :param data_bundle:
+        :return:
+        """
         for dataset in data_bundle.datasets.values():
             if dataset.has_field(Const.TARGET):
                 dataset.drop(lambda x: x[Const.TARGET] == '-')
@@ -178,15 +201,27 @@ class MatchingPipe(Pipe):
     Matching任务的Pipe。输出的DataSet将包含以下的field
 
     .. csv-table::
-       :header: "raw_words1", "raw_words2", "words1", "words2", "target", "seq_len1", "seq_len2"
+       :header: "raw_words1", "raw_words2", "target", "words1", "words2", "seq_len1", "seq_len2"
 
-       "The new rights are...", "Everyone really likes..",  "[2, 3, 4, 5, ...]", "[10, 20, 6]", 1, 10, 13
-       "This site includes a...", "The Government Executive...", "[11, 12, 13,...]", "[2, 7, ...]", 0, 6, 7
-       "...", "...", "[...]", "[...]", ., ., .
+       "The new rights are...", "Everyone really likes..", 1,  "[2, 3, 4, 5, ...]", "[10, 20, 6]", 10, 13
+       "This site includes a...", "The Government Executive...", 0, "[11, 12, 13,...]", "[2, 7, ...]", 6, 7
+       "...", "...", ., "[...]", "[...]", ., .
 
     words1是premise，words2是hypothesis。其中words1,words2,seq_len1,seq_len2被设置为input；target被设置为target
     和input(设置为input以方便在forward函数中计算loss，如果不在forward函数中计算loss也不影响，fastNLP将根据forward函数
     的形参名进行传参)。
+
+    dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::
+
+        +-------------+------------+------------+--------+--------+--------+----------+----------+
+        | field_names | raw_words1 | raw_words2 | target | words1 | words2 | seq_len1 | seq_len2 |
+        +-------------+------------+------------+--------+--------+--------+----------+----------+
+        |   is_input  |   False    |   False    | False  |  True  |  True  |   True   |   True   |
+        |  is_target  |   False    |   False    |  True  | False  | False  |  False   |  False   |
+        | ignore_type |            |            | False  | False  | False  |  False   |  False   |
+        |  pad_value  |            |            |   0    |   0    |   0    |    0     |    0     |
+        +-------------+------------+------------+--------+--------+--------+----------+----------+
+
     """
     
     def __init__(self, lower=False, tokenizer: str = 'raw'):