From 372991c03a65c02c680e64fe54064da972188831 Mon Sep 17 00:00:00 2001 From: benbijituo Date: Fri, 20 Sep 2019 11:23:50 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A1=A5=E5=85=85=E4=BA=86=E4=B8=A4=E4=B8=AA?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E7=9A=84download?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fastNLP/io/file_utils.py | 4 +++- fastNLP/io/loader/classification.py | 9 +++++++++ fastNLP/io/loader/matching.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fastNLP/io/file_utils.py b/fastNLP/io/file_utils.py index 6661397b..022af0ac 100644 --- a/fastNLP/io/file_utils.py +++ b/fastNLP/io/file_utils.py @@ -89,6 +89,7 @@ DATASET_DIR = { "mnli": "MNLI.zip", "snli": "SNLI.zip", "qnli": "QNLI.zip", + "xnli": "XNLI.zip", "sst-2": "SST-2.zip", "sst": "SST.zip", "rte": "RTE.zip", @@ -101,7 +102,8 @@ DATASET_DIR = { "cws-as": 'cws_as.zip', "cws-msra": 'cws_msra.zip', - "chn-senti-corp":"chn_senti_corp.zip" + "chn-senti-corp" : "chn_senti_corp.zip", + "weibo-senti-100k" : "WeiboSenti100k.zip" } PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR, diff --git a/fastNLP/io/loader/classification.py b/fastNLP/io/loader/classification.py index 51660db5..ca9b6107 100644 --- a/fastNLP/io/loader/classification.py +++ b/fastNLP/io/loader/classification.py @@ -518,3 +518,12 @@ class WeiboSenti100kLoader(Loader): if raw_chars: ds.append(Instance(raw_chars=raw_chars, target=target)) return ds + + def download(self) -> str: + """ + 自动下载数据,该数据取自 https://github.com/SophonPlus/ChineseNlpCorpus/ + 在 https://arxiv.org/abs/1906.08101 有使用 + :return: + """ + output_dir = self._get_dataset_path('weibo-senti-100k') + return output_dir diff --git a/fastNLP/io/loader/matching.py b/fastNLP/io/loader/matching.py index df60618b..b9724126 100644 --- a/fastNLP/io/loader/matching.py +++ b/fastNLP/io/loader/matching.py @@ -377,6 +377,16 @@ class XNLILoader(Loader): data_bundle = DataBundle(datasets=datasets) return data_bundle + def download(self) -> str: + """ + 自动下载数据,该数据取自 https://arxiv.org/abs/1809.05053 + 在 https://arxiv.org/pdf/1905.05526.pdf https://arxiv.org/pdf/1901.10125.pdf + https://arxiv.org/pdf/1809.05053.pdf 有使用 + :return: + """ + output_dir = self._get_dataset_path('xnli') + return output_dir + class BQCorpusLoader(Loader): """