Browse Source

修改了CNXNLI的_load(),可以处理特殊的instance格式如下:

“XXX\t"XXX\tXXX
tags/v0.5.0
benbijituo 5 years ago
parent
commit
6b147711af
1 changed files with 6 additions and 12 deletions
  1. +6
    -12
      fastNLP/io/pipe/matching.py

+ 6
- 12
fastNLP/io/pipe/matching.py View File

@@ -352,8 +352,7 @@ class MNLIPipe(MatchingPipe):


class LCQMCPipe(MatchingPipe): class LCQMCPipe(MatchingPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = LCQMCLoader().load(paths) data_bundle = LCQMCLoader().load(paths)
@@ -365,8 +364,7 @@ class LCQMCPipe(MatchingPipe):


class CNXNLIPipe(MatchingPipe): class CNXNLIPipe(MatchingPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = CNXNLILoader().load(paths) data_bundle = CNXNLILoader().load(paths)
@@ -379,8 +377,7 @@ class CNXNLIPipe(MatchingPipe):


class BQCorpusPipe(MatchingPipe): class BQCorpusPipe(MatchingPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = BQCorpusLoader().load(paths) data_bundle = BQCorpusLoader().load(paths)
@@ -475,8 +472,7 @@ class MachingTruncatePipe(Pipe): # truncate sentence for bert, modify seq_len


class LCQMCBertPipe(MatchingBertPipe): class LCQMCBertPipe(MatchingBertPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = LCQMCLoader().load(paths) data_bundle = LCQMCLoader().load(paths)
@@ -489,8 +485,7 @@ class LCQMCBertPipe(MatchingBertPipe):


class BQCorpusBertPipe(MatchingBertPipe): class BQCorpusBertPipe(MatchingBertPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = BQCorpusLoader().load(paths) data_bundle = BQCorpusLoader().load(paths)
@@ -503,8 +498,7 @@ class BQCorpusBertPipe(MatchingBertPipe):


class CNXNLIBertPipe(MatchingBertPipe): class CNXNLIBertPipe(MatchingBertPipe):
def __init__(self): def __init__(self):
super().__init__()
self.tokenizer = 'cn-char'
super().__init__(tokenizer='cn-char')


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
data_bundle = CNXNLILoader().load(paths) data_bundle = CNXNLILoader().load(paths)


Loading…
Cancel
Save