Browse Source

补充了两个数据集的download

tags/v0.4.10
benbijituo 5 years ago
parent
commit
372991c03a
3 changed files with 22 additions and 1 deletions
  1. +3
    -1
      fastNLP/io/file_utils.py
  2. +9
    -0
      fastNLP/io/loader/classification.py
  3. +10
    -0
      fastNLP/io/loader/matching.py

+ 3
- 1
fastNLP/io/file_utils.py View File

@@ -89,6 +89,7 @@ DATASET_DIR = {
"mnli": "MNLI.zip",
"snli": "SNLI.zip",
"qnli": "QNLI.zip",
"xnli": "XNLI.zip",
"sst-2": "SST-2.zip",
"sst": "SST.zip",
"rte": "RTE.zip",
@@ -101,7 +102,8 @@ DATASET_DIR = {
"cws-as": 'cws_as.zip',
"cws-msra": 'cws_msra.zip',

"chn-senti-corp":"chn_senti_corp.zip"
"chn-senti-corp" : "chn_senti_corp.zip",
"weibo-senti-100k" : "WeiboSenti100k.zip"
}

PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR,


+ 9
- 0
fastNLP/io/loader/classification.py View File

@@ -518,3 +518,12 @@ class WeiboSenti100kLoader(Loader):
if raw_chars:
ds.append(Instance(raw_chars=raw_chars, target=target))
return ds

def download(self) -> str:
"""
自动下载数据,该数据取自 https://github.com/SophonPlus/ChineseNlpCorpus/
在 https://arxiv.org/abs/1906.08101 有使用
:return:
"""
output_dir = self._get_dataset_path('weibo-senti-100k')
return output_dir

+ 10
- 0
fastNLP/io/loader/matching.py View File

@@ -377,6 +377,16 @@ class XNLILoader(Loader):
data_bundle = DataBundle(datasets=datasets)
return data_bundle

def download(self) -> str:
"""
自动下载数据,该数据取自 https://arxiv.org/abs/1809.05053
在 https://arxiv.org/pdf/1905.05526.pdf https://arxiv.org/pdf/1901.10125.pdf
https://arxiv.org/pdf/1809.05053.pdf 有使用
:return:
"""
output_dir = self._get_dataset_path('xnli')
return output_dir


class BQCorpusLoader(Loader):
"""


Loading…
Cancel
Save