From 92fa48e1ce01f1d01500f74dc647c72d358536e0 Mon Sep 17 00:00:00 2001
From: benbijituo <benbenjituo@gmail.com>
Date: Fri, 27 Sep 2019 16:37:34 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86CNXNLI=E7=9A=84?=
 =?UTF-8?q?=5Fload()=EF=BC=8C=E5=8F=AF=E4=BB=A5=E5=A4=84=E7=90=86=E7=89=B9?=
 =?UTF-8?q?=E6=AE=8A=E7=9A=84instance=E6=A0=BC=E5=BC=8F=E5=A6=82=E4=B8=8B?=
 =?UTF-8?q?=EF=BC=9A=20=E2=80=9CXXX\t"XXX\tXXX?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fastNLP/io/pipe/matching.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/fastNLP/io/pipe/matching.py b/fastNLP/io/pipe/matching.py
index dbe69525..016730f2 100644
--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -351,6 +351,10 @@ class MNLIPipe(MatchingPipe):
 
 
 class LCQMCPipe(MatchingPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
         data_bundle = RenamePipe().process(data_bundle)
@@ -360,6 +364,10 @@ class LCQMCPipe(MatchingPipe):
 
 
 class CNXNLIPipe(MatchingPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)
         data_bundle = GranularizePipe(task='XNLI').process(data_bundle)
@@ -370,6 +378,10 @@ class CNXNLIPipe(MatchingPipe):
 
 
 class BQCorpusPipe(MatchingPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
         data_bundle = RenamePipe().process(data_bundle)
@@ -462,6 +474,10 @@ class MachingTruncatePipe(Pipe):  # truncate sentence for bert, modify seq_len
 
 
 class LCQMCBertPipe(MatchingBertPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
         data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle)
@@ -472,6 +488,10 @@ class LCQMCBertPipe(MatchingBertPipe):
 
 
 class BQCorpusBertPipe(MatchingBertPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
         data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle)
@@ -482,6 +502,10 @@ class BQCorpusBertPipe(MatchingBertPipe):
 
 
 class CNXNLIBertPipe(MatchingBertPipe):
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = 'cn-char'
+
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)
         data_bundle = GranularizePipe(task='XNLI').process(data_bundle)

From 6b147711af95ad02938cd7ccdda95884fe9de4e1 Mon Sep 17 00:00:00 2001
From: benbijituo <benbenjituo@gmail.com>
Date: Fri, 27 Sep 2019 16:49:26 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86CNXNLI=E7=9A=84?=
 =?UTF-8?q?=5Fload()=EF=BC=8C=E5=8F=AF=E4=BB=A5=E5=A4=84=E7=90=86=E7=89=B9?=
 =?UTF-8?q?=E6=AE=8A=E7=9A=84instance=E6=A0=BC=E5=BC=8F=E5=A6=82=E4=B8=8B?=
 =?UTF-8?q?=EF=BC=9A=20=E2=80=9CXXX\t"XXX\tXXX?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fastNLP/io/pipe/matching.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/fastNLP/io/pipe/matching.py b/fastNLP/io/pipe/matching.py
index 016730f2..6af8f5a6 100644
--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -352,8 +352,7 @@ class MNLIPipe(MatchingPipe):
 
 class LCQMCPipe(MatchingPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
@@ -365,8 +364,7 @@ class LCQMCPipe(MatchingPipe):
 
 class CNXNLIPipe(MatchingPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)
@@ -379,8 +377,7 @@ class CNXNLIPipe(MatchingPipe):
 
 class BQCorpusPipe(MatchingPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
@@ -475,8 +472,7 @@ class MachingTruncatePipe(Pipe):  # truncate sentence for bert, modify seq_len
 
 class LCQMCBertPipe(MatchingBertPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
@@ -489,8 +485,7 @@ class LCQMCBertPipe(MatchingBertPipe):
 
 class BQCorpusBertPipe(MatchingBertPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
@@ -503,8 +498,7 @@ class BQCorpusBertPipe(MatchingBertPipe):
 
 class CNXNLIBertPipe(MatchingBertPipe):
     def __init__(self):
-        super().__init__()
-        self.tokenizer = 'cn-char'
+        super().__init__(tokenizer='cn-char')
 
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)

From 7636ef2990b11ce15082ea71ee233c06357644e3 Mon Sep 17 00:00:00 2001
From: Yige Xu <xuyige1996@gmail.com>
Date: Sat, 28 Sep 2019 18:51:57 +0800
Subject: [PATCH 3/3] fix bugs in Chinese Matching Pipe

---
 fastNLP/io/pipe/matching.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fastNLP/io/pipe/matching.py b/fastNLP/io/pipe/matching.py
index 6af8f5a6..f58706fe 100644
--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -351,8 +351,8 @@ class MNLIPipe(MatchingPipe):
 
 
 class LCQMCPipe(MatchingPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn=char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
@@ -363,8 +363,8 @@ class LCQMCPipe(MatchingPipe):
 
 
 class CNXNLIPipe(MatchingPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn-char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)
@@ -376,8 +376,8 @@ class CNXNLIPipe(MatchingPipe):
 
 
 class BQCorpusPipe(MatchingPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn-char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
@@ -471,8 +471,8 @@ class MachingTruncatePipe(Pipe):  # truncate sentence for bert, modify seq_len
 
 
 class LCQMCBertPipe(MatchingBertPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn=char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = LCQMCLoader().load(paths)
@@ -484,8 +484,8 @@ class LCQMCBertPipe(MatchingBertPipe):
 
 
 class BQCorpusBertPipe(MatchingBertPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn-char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = BQCorpusLoader().load(paths)
@@ -497,8 +497,8 @@ class BQCorpusBertPipe(MatchingBertPipe):
 
 
 class CNXNLIBertPipe(MatchingBertPipe):
-    def __init__(self):
-        super().__init__(tokenizer='cn-char')
+    def __init__(self, tokenizer='cn-char'):
+        super().__init__(tokenizer=tokenizer)
 
     def process_from_file(self, paths=None):
         data_bundle = CNXNLILoader().load(paths)