@@ -89,6 +89,7 @@ DATASET_DIR = { | |||||
"mnli": "MNLI.zip", | "mnli": "MNLI.zip", | ||||
"snli": "SNLI.zip", | "snli": "SNLI.zip", | ||||
"qnli": "QNLI.zip", | "qnli": "QNLI.zip", | ||||
"xnli": "XNLI.zip", | |||||
"sst-2": "SST-2.zip", | "sst-2": "SST-2.zip", | ||||
"sst": "SST.zip", | "sst": "SST.zip", | ||||
"rte": "RTE.zip", | "rte": "RTE.zip", | ||||
@@ -101,7 +102,8 @@ DATASET_DIR = { | |||||
"cws-as": 'cws_as.zip', | "cws-as": 'cws_as.zip', | ||||
"cws-msra": 'cws_msra.zip', | "cws-msra": 'cws_msra.zip', | ||||
"chn-senti-corp":"chn_senti_corp.zip" | |||||
"chn-senti-corp" : "chn_senti_corp.zip", | |||||
"weibo-senti-100k" : "WeiboSenti100k.zip" | |||||
} | } | ||||
PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR, | PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR, | ||||
@@ -518,3 +518,12 @@ class WeiboSenti100kLoader(Loader): | |||||
if raw_chars: | if raw_chars: | ||||
ds.append(Instance(raw_chars=raw_chars, target=target)) | ds.append(Instance(raw_chars=raw_chars, target=target)) | ||||
return ds | return ds | ||||
def download(self) -> str: | |||||
""" | |||||
自动下载数据,该数据取自 https://github.com/SophonPlus/ChineseNlpCorpus/ | |||||
在 https://arxiv.org/abs/1906.08101 有使用 | |||||
:return: | |||||
""" | |||||
output_dir = self._get_dataset_path('weibo-senti-100k') | |||||
return output_dir |
@@ -377,6 +377,16 @@ class XNLILoader(Loader): | |||||
data_bundle = DataBundle(datasets=datasets) | data_bundle = DataBundle(datasets=datasets) | ||||
return data_bundle | return data_bundle | ||||
def download(self) -> str: | |||||
""" | |||||
自动下载数据,该数据取自 https://arxiv.org/abs/1809.05053 | |||||
在 https://arxiv.org/pdf/1905.05526.pdf https://arxiv.org/pdf/1901.10125.pdf | |||||
https://arxiv.org/pdf/1809.05053.pdf 有使用 | |||||
:return: | |||||
""" | |||||
output_dir = self._get_dataset_path('xnli') | |||||
return output_dir | |||||
class BQCorpusLoader(Loader): | class BQCorpusLoader(Loader): | ||||
""" | """ | ||||