Browse Source

修改了CNXNLI的_load(),可以处理特殊的instance格式如下:

“XXX\t"XXX\tXXX
tags/v0.5.0
benbijituo 5 years ago
parent
commit
6b147711af
1 changed files with 6 additions and 12 deletions
  1. +6
    -12
      fastNLP/io/pipe/matching.py

+ 6
- 12
fastNLP/io/pipe/matching.py View File

@@ -352,8 +352,7 @@ class MNLIPipe(MatchingPipe):

class LCQMCPipe(MatchingPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = LCQMCLoader().load(paths)
@@ -365,8 +364,7 @@ class LCQMCPipe(MatchingPipe):

class CNXNLIPipe(MatchingPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = CNXNLILoader().load(paths)
@@ -379,8 +377,7 @@ class CNXNLIPipe(MatchingPipe):

class BQCorpusPipe(MatchingPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = BQCorpusLoader().load(paths)
@@ -475,8 +472,7 @@ class MachingTruncatePipe(Pipe): # truncate sentence for bert, modify seq_len

class LCQMCBertPipe(MatchingBertPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = LCQMCLoader().load(paths)
@@ -489,8 +485,7 @@ class LCQMCBertPipe(MatchingBertPipe):

class BQCorpusBertPipe(MatchingBertPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = BQCorpusLoader().load(paths)
@@ -503,8 +498,7 @@ class BQCorpusBertPipe(MatchingBertPipe):

class CNXNLIBertPipe(MatchingBertPipe):
def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')

def process_from_file(self, paths=None):
data_bundle = CNXNLILoader().load(paths)


Loading…
Cancel
Save