@@ -2,6 +2,6 @@ fastNLP.core.callback | |||||
===================== | ===================== | ||||
.. automodule:: fastNLP.core.callback | .. automodule:: fastNLP.core.callback | ||||
:members: Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, EchoCallback, CallbackException, EarlyStopError | |||||
:members: Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, CallbackException, EarlyStopError | |||||
:inherited-members: | :inherited-members: | ||||
@@ -2,6 +2,6 @@ fastNLP.io.loader | |||||
================= | ================= | ||||
.. automodule:: fastNLP.io.loader | .. automodule:: fastNLP.io.loader | ||||
:members: Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CoReferenceLoader | |||||
:members: Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, LCQMCLoader, CoReferenceLoader | |||||
:inherited-members: | :inherited-members: | ||||
@@ -2,6 +2,6 @@ fastNLP.io.pipe | |||||
=============== | =============== | ||||
.. automodule:: fastNLP.io.pipe | .. automodule:: fastNLP.io.pipe | ||||
:members: Pipe, CWSPipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe, Conll2003Pipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CoReferencePipe | |||||
:members: Pipe, CWSPipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe, Conll2003Pipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, CNXNLIBertPipe, BQCorpusBertPipe, LCQMCBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, LCQMCPipe, CNXNLIPipe, BQCorpusPipe, RenamePipe, GranularizePipe, MachingTruncatePipe, CoReferencePipe | |||||
:inherited-members: | :inherited-members: | ||||
@@ -2,7 +2,7 @@ fastNLP.io | |||||
========== | ========== | ||||
.. automodule:: fastNLP.io | .. automodule:: fastNLP.io | ||||
:members: DataBundle, EmbedLoader, Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, WeiboNERLoader, PeopleDailyNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, Pipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, Conll2003Pipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, CWSPipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, ModelLoader, ModelSaver | |||||
:members: DataBundle, EmbedLoader, Loader, YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader, ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader, MsraNERLoader, WeiboNERLoader, PeopleDailyNERLoader, CSVLoader, JsonLoader, CWSLoader, MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, LCQMCLoader, Pipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe, Conll2003Pipe, Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, CWSPipe, MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, ModelLoader, ModelSaver | |||||
:inherited-members: | :inherited-members: | ||||
子模块 | 子模块 | ||||
@@ -2,7 +2,7 @@ fastNLP | |||||
======= | ======= | ||||
.. automodule:: fastNLP | .. automodule:: fastNLP | ||||
:members: Instance, FieldArray, DataSetIter, BatchIter, TorchLoaderIter, Vocabulary, DataSet, Const, Trainer, Tester, Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, EchoCallback, CallbackException, EarlyStopError, Padder, AutoPadder, EngChar2DPadder, AccuracyMetric, SpanFPreRecMetric, ExtractiveQAMetric, Optimizer, SGD, Adam, AdamW, Sampler, SequentialSampler, BucketSampler, RandomSampler, LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward, cache_results, logger | |||||
:members: Instance, FieldArray, DataSetIter, BatchIter, TorchLoaderIter, Vocabulary, DataSet, Const, Trainer, Tester, Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, CallbackException, EarlyStopError, Padder, AutoPadder, EngChar2DPadder, AccuracyMetric, SpanFPreRecMetric, ExtractiveQAMetric, Optimizer, SGD, Adam, AdamW, Sampler, SequentialSampler, BucketSampler, RandomSampler, LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward, cache_results, logger | |||||
:inherited-members: | :inherited-members: | ||||
子模块 | 子模块 | ||||
@@ -47,7 +47,7 @@ __all__ = [ | |||||
"SNLILoader", | "SNLILoader", | ||||
"QNLILoader", | "QNLILoader", | ||||
"RTELoader", | "RTELoader", | ||||
"XNLILoader", | |||||
"CNXNLILoader", | |||||
"BQCorpusLoader", | "BQCorpusLoader", | ||||
"LCQMCLoader", | "LCQMCLoader", | ||||
@@ -70,32 +70,61 @@ __all__ = [ | |||||
"WeiboNERPipe", | "WeiboNERPipe", | ||||
"CWSPipe", | "CWSPipe", | ||||
"Pipe", | |||||
"CWSPipe", | |||||
"YelpFullPipe", | |||||
"YelpPolarityPipe", | |||||
"SSTPipe", | |||||
"SST2Pipe", | |||||
"IMDBPipe", | |||||
"ChnSentiCorpPipe", | |||||
"THUCNewsPipe", | |||||
"WeiboSenti100kPipe", | |||||
"Conll2003NERPipe", | |||||
"OntoNotesNERPipe", | |||||
"MsraNERPipe", | |||||
"WeiboNERPipe", | |||||
"PeopleDailyPipe", | |||||
"Conll2003Pipe", | |||||
"MatchingBertPipe", | "MatchingBertPipe", | ||||
"RTEBertPipe", | "RTEBertPipe", | ||||
"SNLIBertPipe", | "SNLIBertPipe", | ||||
"QuoraBertPipe", | "QuoraBertPipe", | ||||
"QNLIBertPipe", | "QNLIBertPipe", | ||||
"MNLIBertPipe", | "MNLIBertPipe", | ||||
"CNXNLIBertPipe", | |||||
"BQCorpusBertPipe", | |||||
"LCQMCBertPipe", | |||||
"MatchingPipe", | "MatchingPipe", | ||||
"RTEPipe", | "RTEPipe", | ||||
"SNLIPipe", | "SNLIPipe", | ||||
"QuoraPipe", | "QuoraPipe", | ||||
"QNLIPipe", | "QNLIPipe", | ||||
"MNLIPipe", | "MNLIPipe", | ||||
"LCQMCPipe", | |||||
"CNXNLIPipe", | |||||
"BQCorpusPipe", | |||||
"RenamePipe", | |||||
"GranularizePipe", | |||||
"MachingTruncatePipe", | |||||
'ModelLoader', | 'ModelLoader', | ||||
'ModelSaver', | 'ModelSaver', | ||||
] | ] | ||||
from .embed_loader import EmbedLoader | |||||
from .data_bundle import DataBundle | |||||
from .model_io import ModelLoader, ModelSaver | |||||
import sys | |||||
from .data_bundle import DataBundle | |||||
from .embed_loader import EmbedLoader | |||||
from .loader import * | from .loader import * | ||||
from .model_io import ModelLoader, ModelSaver | |||||
from .pipe import * | from .pipe import * | ||||
import sys | |||||
from ..doc_utils import doc_process | from ..doc_utils import doc_process | ||||
doc_process(sys.modules[__name__]) | doc_process(sys.modules[__name__]) |
@@ -54,7 +54,9 @@ __all__ = [ | |||||
'SSTLoader', | 'SSTLoader', | ||||
'SST2Loader', | 'SST2Loader', | ||||
"ChnSentiCorpLoader", | "ChnSentiCorpLoader", | ||||
"THUCNewsLoader", | |||||
"WeiboSenti100kLoader", | |||||
'ConllLoader', | 'ConllLoader', | ||||
'Conll2003Loader', | 'Conll2003Loader', | ||||
'Conll2003NERLoader', | 'Conll2003NERLoader', | ||||
@@ -63,26 +65,31 @@ __all__ = [ | |||||
"MsraNERLoader", | "MsraNERLoader", | ||||
"PeopleDailyNERLoader", | "PeopleDailyNERLoader", | ||||
"WeiboNERLoader", | "WeiboNERLoader", | ||||
'CSVLoader', | 'CSVLoader', | ||||
'JsonLoader', | 'JsonLoader', | ||||
'CWSLoader', | 'CWSLoader', | ||||
'MNLILoader', | 'MNLILoader', | ||||
"QuoraLoader", | "QuoraLoader", | ||||
"SNLILoader", | "SNLILoader", | ||||
"QNLILoader", | "QNLILoader", | ||||
"RTELoader", | "RTELoader", | ||||
"CNXNLILoader", | |||||
"BQCorpusLoader", | |||||
"LCQMCLoader", | |||||
"CoReferenceLoader" | "CoReferenceLoader" | ||||
] | ] | ||||
from .classification import YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, ChnSentiCorpLoader | |||||
from .classification import YelpLoader, YelpFullLoader, YelpPolarityLoader, IMDBLoader, SSTLoader, SST2Loader, \ | |||||
ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader | |||||
from .conll import ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader | from .conll import ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader | ||||
from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader | |||||
from .coreference import CoReferenceLoader | |||||
from .csv import CSVLoader | from .csv import CSVLoader | ||||
from .cws import CWSLoader | from .cws import CWSLoader | ||||
from .json import JsonLoader | from .json import JsonLoader | ||||
from .loader import Loader | from .loader import Loader | ||||
from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader | |||||
from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader | |||||
from .coreference import CoReferenceLoader | |||||
from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, \ | |||||
LCQMCLoader |
@@ -409,6 +409,7 @@ class THUCNewsLoader(Loader): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_words", "target" | :header: "raw_words", "target" | ||||
"马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育" | "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育" | ||||
"...", "..." | "...", "..." | ||||
@@ -446,13 +447,18 @@ class WeiboSenti100kLoader(Loader): | |||||
别名: | 别名: | ||||
数据集简介:微博sentiment classification,二分类 | 数据集简介:微博sentiment classification,二分类 | ||||
原始数据内容为: | 原始数据内容为: | ||||
label text | |||||
0 六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒] | |||||
1 听过一场!笑死了昂,一听茄子脱口秀,从此节操是路人![嘻嘻] //@中国梦网官微:@Pencil彭赛 @茄子脱口秀 [圣诞帽][圣诞树][平安果] | |||||
.. .. code-block:: text | |||||
label text | |||||
0 六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒] | |||||
1 听过一场!笑死了昂,一听茄子脱口秀,从此节操是路人![嘻嘻] //@中国梦网官微:@Pencil彭赛 @茄子脱口秀 [圣诞帽][圣诞树][平安果] | |||||
读取后的Dataset将具有以下数据结构: | 读取后的Dataset将具有以下数据结构: | ||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_chars", "target" | :header: "raw_chars", "target" | ||||
"六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0" | "六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0" | ||||
"...", "..." | "...", "..." | ||||
@@ -15,14 +15,14 @@ import os | |||||
import warnings | import warnings | ||||
from typing import Union, Dict | from typing import Union, Dict | ||||
from .csv import CSVLoader | |||||
from .json import JsonLoader | from .json import JsonLoader | ||||
from .loader import Loader | from .loader import Loader | ||||
from .. import DataBundle | from .. import DataBundle | ||||
from ..utils import check_loader_paths | |||||
from ...core.const import Const | from ...core.const import Const | ||||
from ...core.dataset import DataSet | from ...core.dataset import DataSet | ||||
from ...core.instance import Instance | from ...core.instance import Instance | ||||
from .csv import CSVLoader | |||||
from ..utils import check_loader_paths | |||||
class MNLILoader(Loader): | class MNLILoader(Loader): | ||||
@@ -348,8 +348,9 @@ class CNXNLILoader(Loader): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_chars1", "raw_chars2", "target" | :header: "raw_chars1", "raw_chars2", "target" | ||||
"从概念上看,奶油收入有两个基本方面产品和地理.", "产品和地理是什么使奶油抹霜工作.", "1" | "从概念上看,奶油收入有两个基本方面产品和地理.", "产品和地理是什么使奶油抹霜工作.", "1" | ||||
""...", "...", "..." | |||||
"...", "...", "..." | |||||
""" | """ | ||||
@@ -412,6 +413,7 @@ class BQCorpusLoader(Loader): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_chars1", "raw_chars2", "target" | :header: "raw_chars1", "raw_chars2", "target" | ||||
"不是邀请的如何贷款?", "我不是你们邀请的客人可以贷款吗?", "1" | "不是邀请的如何贷款?", "我不是你们邀请的客人可以贷款吗?", "1" | ||||
"如何满足微粒银行的审核", "建设银行有微粒贷的资格吗", "0" | "如何满足微粒银行的审核", "建设银行有微粒贷的资格吗", "0" | ||||
"...", "...", "..." | "...", "...", "..." | ||||
@@ -458,9 +460,10 @@ class LCQMCLoader(Loader): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_chars1", "raw_chars2", "target" | :header: "raw_chars1", "raw_chars2", "target" | ||||
"喜欢打篮球的男生喜欢什么样的女生?", "爱打篮球的男生喜欢什么样的女生?", "1" | "喜欢打篮球的男生喜欢什么样的女生?", "爱打篮球的男生喜欢什么样的女生?", "1" | ||||
"晚上睡觉带着耳机听音乐有什么害处吗?", "妇可以戴耳机听音乐吗?", "0" | "晚上睡觉带着耳机听音乐有什么害处吗?", "妇可以戴耳机听音乐吗?", "0" | ||||
""...", "...", "..." | |||||
"...", "...", "..." | |||||
""" | """ | ||||
@@ -9,9 +9,9 @@ Pipe用于处理通过 Loader 读取的数据,所有的 Pipe 都包含 ``proce | |||||
""" | """ | ||||
__all__ = [ | __all__ = [ | ||||
"Pipe", | "Pipe", | ||||
"CWSPipe", | "CWSPipe", | ||||
"YelpFullPipe", | "YelpFullPipe", | ||||
"YelpPolarityPipe", | "YelpPolarityPipe", | ||||
"SSTPipe", | "SSTPipe", | ||||
@@ -20,35 +20,46 @@ __all__ = [ | |||||
"ChnSentiCorpPipe", | "ChnSentiCorpPipe", | ||||
"THUCNewsPipe", | "THUCNewsPipe", | ||||
"WeiboSenti100kPipe", | "WeiboSenti100kPipe", | ||||
"Conll2003NERPipe", | "Conll2003NERPipe", | ||||
"OntoNotesNERPipe", | "OntoNotesNERPipe", | ||||
"MsraNERPipe", | "MsraNERPipe", | ||||
"WeiboNERPipe", | "WeiboNERPipe", | ||||
"PeopleDailyPipe", | "PeopleDailyPipe", | ||||
"Conll2003Pipe", | "Conll2003Pipe", | ||||
"MatchingBertPipe", | "MatchingBertPipe", | ||||
"RTEBertPipe", | "RTEBertPipe", | ||||
"SNLIBertPipe", | "SNLIBertPipe", | ||||
"QuoraBertPipe", | "QuoraBertPipe", | ||||
"QNLIBertPipe", | "QNLIBertPipe", | ||||
"MNLIBertPipe", | "MNLIBertPipe", | ||||
"CNXNLIBertPipe", | |||||
"BQCorpusBertPipe", | |||||
"LCQMCBertPipe", | |||||
"MatchingPipe", | "MatchingPipe", | ||||
"RTEPipe", | "RTEPipe", | ||||
"SNLIPipe", | "SNLIPipe", | ||||
"QuoraPipe", | "QuoraPipe", | ||||
"QNLIPipe", | "QNLIPipe", | ||||
"MNLIPipe", | "MNLIPipe", | ||||
"LCQMCPipe", | |||||
"CNXNLIPipe", | |||||
"BQCorpusPipe", | |||||
"RenamePipe", | |||||
"GranularizePipe", | |||||
"MachingTruncatePipe", | |||||
"CoReferencePipe" | "CoReferencePipe" | ||||
] | ] | ||||
from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, WeiboSenti100kPipe | |||||
from .classification import YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, \ | |||||
WeiboSenti100kPipe | |||||
from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe | from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe | ||||
from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \ | |||||
MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe | |||||
from .pipe import Pipe | |||||
from .conll import Conll2003Pipe | from .conll import Conll2003Pipe | ||||
from .cws import CWSPipe | |||||
from .coreference import CoReferencePipe | from .coreference import CoReferencePipe | ||||
from .cws import CWSPipe | |||||
from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \ | |||||
MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CNXNLIBertPipe, CNXNLIPipe, BQCorpusBertPipe, \ | |||||
LCQMCPipe, BQCorpusPipe, LCQMCBertPipe, RenamePipe, GranularizePipe, MachingTruncatePipe | |||||
from .pipe import Pipe |
@@ -21,11 +21,11 @@ from .utils import get_tokenizer, _indexize, _add_words_field, _drop_empty_insta | |||||
from ..data_bundle import DataBundle | from ..data_bundle import DataBundle | ||||
from ..loader.classification import ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader | from ..loader.classification import ChnSentiCorpLoader, THUCNewsLoader, WeiboSenti100kLoader | ||||
from ..loader.classification import IMDBLoader, YelpFullLoader, SSTLoader, SST2Loader, YelpPolarityLoader | from ..loader.classification import IMDBLoader, YelpFullLoader, SSTLoader, SST2Loader, YelpPolarityLoader | ||||
from ...core._logger import logger | |||||
from ...core.const import Const | from ...core.const import Const | ||||
from ...core.dataset import DataSet | from ...core.dataset import DataSet | ||||
from ...core.instance import Instance | from ...core.instance import Instance | ||||
from ...core.vocabulary import Vocabulary | from ...core.vocabulary import Vocabulary | ||||
from ...core._logger import logger | |||||
nonalpnum = re.compile('[^0-9a-zA-Z?!\']+') | nonalpnum = re.compile('[^0-9a-zA-Z?!\']+') | ||||
@@ -718,6 +718,7 @@ class THUCNewsPipe(_CLSPipe): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_words", "target" | :header: "raw_words", "target" | ||||
"马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育" | "马晓旭意外受伤让国奥警惕 无奈大雨格外青睐殷家军记者傅亚雨沈阳报道 ... ", "体育" | ||||
"...", "..." | "...", "..." | ||||
@@ -826,6 +827,7 @@ class WeiboSenti100kPipe(_CLSPipe): | |||||
.. csv-table:: | .. csv-table:: | ||||
:header: "raw_chars", "target" | :header: "raw_chars", "target" | ||||
"六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0" | "六一出生的?好讽刺…… //@祭春姬:他爸爸是外星人吧 //@面孔小高:现在的孩子都怎么了 [怒][怒][怒]", "0" | ||||
"...", "..." | "...", "..." | ||||
@@ -16,20 +16,24 @@ __all__ = [ | |||||
"QuoraPipe", | "QuoraPipe", | ||||
"QNLIPipe", | "QNLIPipe", | ||||
"MNLIPipe", | "MNLIPipe", | ||||
"LCQMCPipe", | |||||
"CNXNLIPipe", | "CNXNLIPipe", | ||||
"BQCorpusPipe", | "BQCorpusPipe", | ||||
"LCQMCPipe", | |||||
"RenamePipe", | |||||
"GranularizePipe", | |||||
"MachingTruncatePipe", | |||||
] | ] | ||||
import warnings | import warnings | ||||
from .pipe import Pipe | from .pipe import Pipe | ||||
from .utils import get_tokenizer | from .utils import get_tokenizer | ||||
from ..loader.matching import SNLILoader, MNLILoader, QNLILoader, RTELoader, QuoraLoader, BQCorpusLoader, CNXNLILoader, LCQMCLoader | |||||
from ..data_bundle import DataBundle | |||||
from ..loader.matching import SNLILoader, MNLILoader, QNLILoader, RTELoader, QuoraLoader, BQCorpusLoader, CNXNLILoader, \ | |||||
LCQMCLoader | |||||
from ...core._logger import logger | |||||
from ...core.const import Const | from ...core.const import Const | ||||
from ...core.vocabulary import Vocabulary | from ...core.vocabulary import Vocabulary | ||||
from ...core._logger import logger | |||||
from ..data_bundle import DataBundle | |||||
class MatchingBertPipe(Pipe): | class MatchingBertPipe(Pipe): | ||||
@@ -145,7 +149,7 @@ class MatchingBertPipe(Pipe): | |||||
f"data set but not in train data set!." | f"data set but not in train data set!." | ||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warning(warn_msg) | logger.warning(warn_msg) | ||||
has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | ||||
dataset.has_field(Const.TARGET)] | dataset.has_field(Const.TARGET)] | ||||
target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET) | target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET) | ||||
@@ -294,7 +298,7 @@ class MatchingPipe(Pipe): | |||||
f"data set but not in train data set!." | f"data set but not in train data set!." | ||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warning(warn_msg) | logger.warning(warn_msg) | ||||
has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | ||||
dataset.has_field(Const.TARGET)] | dataset.has_field(Const.TARGET)] | ||||
target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET) | target_vocab.index_dataset(*has_target_datasets, field_name=Const.TARGET) | ||||
@@ -345,8 +349,9 @@ class MNLIPipe(MatchingPipe): | |||||
data_bundle = MNLILoader().load(paths) | data_bundle = MNLILoader().load(paths) | ||||
return self.process(data_bundle) | return self.process(data_bundle) | ||||
class LCQMCPipe(MatchingPipe): | class LCQMCPipe(MatchingPipe): | ||||
def process_from_file(self, paths = None): | |||||
def process_from_file(self, paths=None): | |||||
data_bundle = LCQMCLoader().load(paths) | data_bundle = LCQMCLoader().load(paths) | ||||
data_bundle = RenamePipe().process(data_bundle) | data_bundle = RenamePipe().process(data_bundle) | ||||
data_bundle = self.process(data_bundle) | data_bundle = self.process(data_bundle) | ||||
@@ -358,14 +363,14 @@ class CNXNLIPipe(MatchingPipe): | |||||
def process_from_file(self, paths=None): | def process_from_file(self, paths=None): | ||||
data_bundle = CNXNLILoader().load(paths) | data_bundle = CNXNLILoader().load(paths) | ||||
data_bundle = GranularizePipe(task='XNLI').process(data_bundle) | data_bundle = GranularizePipe(task='XNLI').process(data_bundle) | ||||
data_bundle = RenamePipe().process(data_bundle) #使中文数据的field | |||||
data_bundle = RenamePipe().process(data_bundle) # 使中文数据的field | |||||
data_bundle = self.process(data_bundle) | data_bundle = self.process(data_bundle) | ||||
data_bundle = RenamePipe().process(data_bundle) | data_bundle = RenamePipe().process(data_bundle) | ||||
return data_bundle | return data_bundle | ||||
class BQCorpusPipe(MatchingPipe): | class BQCorpusPipe(MatchingPipe): | ||||
def process_from_file(self, paths = None): | |||||
def process_from_file(self, paths=None): | |||||
data_bundle = BQCorpusLoader().load(paths) | data_bundle = BQCorpusLoader().load(paths) | ||||
data_bundle = RenamePipe().process(data_bundle) | data_bundle = RenamePipe().process(data_bundle) | ||||
data_bundle = self.process(data_bundle) | data_bundle = self.process(data_bundle) | ||||
@@ -374,12 +379,12 @@ class BQCorpusPipe(MatchingPipe): | |||||
class RenamePipe(Pipe): | class RenamePipe(Pipe): | ||||
def __init__(self, task = 'cn-nli'): | |||||
def __init__(self, task='cn-nli'): | |||||
super().__init__() | super().__init__() | ||||
self.task = task | self.task = task | ||||
def process(self, data_bundle: DataBundle): # rename field name for Chinese Matching dataset | def process(self, data_bundle: DataBundle): # rename field name for Chinese Matching dataset | ||||
if(self.task == 'cn-nli'): | |||||
if (self.task == 'cn-nli'): | |||||
for name, dataset in data_bundle.datasets.items(): | for name, dataset in data_bundle.datasets.items(): | ||||
if (dataset.has_field(Const.RAW_CHARS(0))): | if (dataset.has_field(Const.RAW_CHARS(0))): | ||||
dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0)) # RAW_CHARS->RAW_WORDS | dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0)) # RAW_CHARS->RAW_WORDS | ||||
@@ -392,12 +397,12 @@ class RenamePipe(Pipe): | |||||
else: | else: | ||||
raise RuntimeError( | raise RuntimeError( | ||||
"field name of dataset is not qualified. It should have ether RAW_CHARS or WORDS") | "field name of dataset is not qualified. It should have ether RAW_CHARS or WORDS") | ||||
elif(self.task == 'cn-nli-bert'): | |||||
elif (self.task == 'cn-nli-bert'): | |||||
for name, dataset in data_bundle.datasets.items(): | for name, dataset in data_bundle.datasets.items(): | ||||
if (dataset.has_field(Const.RAW_CHARS(0))): | if (dataset.has_field(Const.RAW_CHARS(0))): | ||||
dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0)) # RAW_CHARS->RAW_WORDS | dataset.rename_field(Const.RAW_CHARS(0), Const.RAW_WORDS(0)) # RAW_CHARS->RAW_WORDS | ||||
dataset.rename_field(Const.RAW_CHARS(1), Const.RAW_WORDS(1)) | dataset.rename_field(Const.RAW_CHARS(1), Const.RAW_WORDS(1)) | ||||
elif(dataset.has_field(Const.RAW_WORDS(0))): | |||||
elif (dataset.has_field(Const.RAW_WORDS(0))): | |||||
dataset.rename_field(Const.RAW_WORDS(0), Const.RAW_CHARS(0)) | dataset.rename_field(Const.RAW_WORDS(0), Const.RAW_CHARS(0)) | ||||
dataset.rename_field(Const.RAW_WORDS(1), Const.RAW_CHARS(1)) | dataset.rename_field(Const.RAW_WORDS(1), Const.RAW_CHARS(1)) | ||||
dataset.rename_field(Const.INPUT, Const.CHAR_INPUT) | dataset.rename_field(Const.INPUT, Const.CHAR_INPUT) | ||||
@@ -409,15 +414,15 @@ class RenamePipe(Pipe): | |||||
raise RuntimeError( | raise RuntimeError( | ||||
"Only support task='cn-nli' or 'cn-nli-bert'" | "Only support task='cn-nli' or 'cn-nli-bert'" | ||||
) | ) | ||||
return data_bundle | return data_bundle | ||||
class GranularizePipe(Pipe): | class GranularizePipe(Pipe): | ||||
def __init__(self, task = None): | |||||
def __init__(self, task=None): | |||||
super().__init__() | super().__init__() | ||||
self.task = task | self.task = task | ||||
def _granularize(self, data_bundle, tag_map): | def _granularize(self, data_bundle, tag_map): | ||||
""" | """ | ||||
该函数对data_bundle中'target'列中的内容进行转换。 | 该函数对data_bundle中'target'列中的内容进行转换。 | ||||
@@ -434,21 +439,22 @@ class GranularizePipe(Pipe): | |||||
dataset.drop(lambda ins: ins[Const.TARGET] == -100) | dataset.drop(lambda ins: ins[Const.TARGET] == -100) | ||||
data_bundle.set_dataset(dataset, name) | data_bundle.set_dataset(dataset, name) | ||||
return data_bundle | return data_bundle | ||||
def process(self, data_bundle: DataBundle): | def process(self, data_bundle: DataBundle): | ||||
task_tag_dict = { | task_tag_dict = { | ||||
'XNLI':{'neutral': 0, 'entailment': 1, 'contradictory': 2, 'contradiction': 2} | |||||
'XNLI': {'neutral': 0, 'entailment': 1, 'contradictory': 2, 'contradiction': 2} | |||||
} | } | ||||
if self.task in task_tag_dict: | if self.task in task_tag_dict: | ||||
data_bundle = self._granularize(data_bundle=data_bundle, tag_map= task_tag_dict[self.task]) | |||||
data_bundle = self._granularize(data_bundle=data_bundle, tag_map=task_tag_dict[self.task]) | |||||
else: | else: | ||||
raise RuntimeError(f"Only support {task_tag_dict.keys()} task_tag_map.") | raise RuntimeError(f"Only support {task_tag_dict.keys()} task_tag_map.") | ||||
return data_bundle | return data_bundle | ||||
class MachingTruncatePipe(Pipe): #truncate sentence for bert, modify seq_len | |||||
class MachingTruncatePipe(Pipe): # truncate sentence for bert, modify seq_len | |||||
def __init__(self): | def __init__(self): | ||||
super().__init__() | super().__init__() | ||||
def process(self, data_bundle: DataBundle): | def process(self, data_bundle: DataBundle): | ||||
for name, dataset in data_bundle.datasets.items(): | for name, dataset in data_bundle.datasets.items(): | ||||
pass | pass | ||||
@@ -456,7 +462,7 @@ class MachingTruncatePipe(Pipe): #truncate sentence for bert, modify seq_len | |||||
class LCQMCBertPipe(MatchingBertPipe): | class LCQMCBertPipe(MatchingBertPipe): | ||||
def process_from_file(self, paths = None): | |||||
def process_from_file(self, paths=None): | |||||
data_bundle = LCQMCLoader().load(paths) | data_bundle = LCQMCLoader().load(paths) | ||||
data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | ||||
data_bundle = self.process(data_bundle) | data_bundle = self.process(data_bundle) | ||||
@@ -465,7 +471,7 @@ class LCQMCBertPipe(MatchingBertPipe): | |||||
class BQCorpusBertPipe(MatchingBertPipe): | class BQCorpusBertPipe(MatchingBertPipe): | ||||
def process_from_file(self, paths = None): | |||||
def process_from_file(self, paths=None): | |||||
data_bundle = BQCorpusLoader().load(paths) | data_bundle = BQCorpusLoader().load(paths) | ||||
data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | ||||
data_bundle = self.process(data_bundle) | data_bundle = self.process(data_bundle) | ||||
@@ -474,7 +480,7 @@ class BQCorpusBertPipe(MatchingBertPipe): | |||||
class CNXNLIBertPipe(MatchingBertPipe): | class CNXNLIBertPipe(MatchingBertPipe): | ||||
def process_from_file(self, paths = None): | |||||
def process_from_file(self, paths=None): | |||||
data_bundle = CNXNLILoader().load(paths) | data_bundle = CNXNLILoader().load(paths) | ||||
data_bundle = GranularizePipe(task='XNLI').process(data_bundle) | data_bundle = GranularizePipe(task='XNLI').process(data_bundle) | ||||
data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | data_bundle = RenamePipe(task='cn-nli-bert').process(data_bundle) | ||||