diff --git a/fastNLP/io/file_utils.py b/fastNLP/io/file_utils.py index 6661397b..022af0ac 100644 --- a/fastNLP/io/file_utils.py +++ b/fastNLP/io/file_utils.py @@ -89,6 +89,7 @@ DATASET_DIR = { "mnli": "MNLI.zip", "snli": "SNLI.zip", "qnli": "QNLI.zip", + "xnli": "XNLI.zip", "sst-2": "SST-2.zip", "sst": "SST.zip", "rte": "RTE.zip", @@ -101,7 +102,8 @@ DATASET_DIR = { "cws-as": 'cws_as.zip', "cws-msra": 'cws_msra.zip', - "chn-senti-corp":"chn_senti_corp.zip" + "chn-senti-corp" : "chn_senti_corp.zip", + "weibo-senti-100k" : "WeiboSenti100k.zip" } PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR, diff --git a/fastNLP/io/loader/classification.py b/fastNLP/io/loader/classification.py index 51660db5..ca9b6107 100644 --- a/fastNLP/io/loader/classification.py +++ b/fastNLP/io/loader/classification.py @@ -518,3 +518,12 @@ class WeiboSenti100kLoader(Loader): if raw_chars: ds.append(Instance(raw_chars=raw_chars, target=target)) return ds + + def download(self) -> str: + """ + 自动下载数据,该数据取自 https://github.com/SophonPlus/ChineseNlpCorpus/ + 在 https://arxiv.org/abs/1906.08101 有使用 + :return: + """ + output_dir = self._get_dataset_path('weibo-senti-100k') + return output_dir diff --git a/fastNLP/io/loader/matching.py b/fastNLP/io/loader/matching.py index df60618b..b9724126 100644 --- a/fastNLP/io/loader/matching.py +++ b/fastNLP/io/loader/matching.py @@ -377,6 +377,16 @@ class XNLILoader(Loader): data_bundle = DataBundle(datasets=datasets) return data_bundle + def download(self) -> str: + """ + 自动下载数据,该数据取自 https://arxiv.org/abs/1809.05053 + 在 https://arxiv.org/pdf/1905.05526.pdf https://arxiv.org/pdf/1901.10125.pdf + https://arxiv.org/pdf/1809.05053.pdf 有使用 + :return: + """ + output_dir = self._get_dataset_path('xnli') + return output_dir + class BQCorpusLoader(Loader): """