Browse Source

Merge pull request #228 from benbijituo/dev0.5.0

补充了两个数据集的download
tags/v0.4.10
Yige Xu GitHub 5 years ago
parent
commit
81a8a87060
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 1 deletions
  1. +3
    -1
      fastNLP/io/file_utils.py
  2. +9
    -0
      fastNLP/io/loader/classification.py
  3. +10
    -0
      fastNLP/io/loader/matching.py

+ 3
- 1
fastNLP/io/file_utils.py View File

@@ -89,6 +89,7 @@ DATASET_DIR = {
"mnli": "MNLI.zip", "mnli": "MNLI.zip",
"snli": "SNLI.zip", "snli": "SNLI.zip",
"qnli": "QNLI.zip", "qnli": "QNLI.zip",
"xnli": "XNLI.zip",
"sst-2": "SST-2.zip", "sst-2": "SST-2.zip",
"sst": "SST.zip", "sst": "SST.zip",
"rte": "RTE.zip", "rte": "RTE.zip",
@@ -101,7 +102,8 @@ DATASET_DIR = {
"cws-as": 'cws_as.zip', "cws-as": 'cws_as.zip',
"cws-msra": 'cws_msra.zip', "cws-msra": 'cws_msra.zip',


"chn-senti-corp":"chn_senti_corp.zip"
"chn-senti-corp" : "chn_senti_corp.zip",
"weibo-senti-100k" : "WeiboSenti100k.zip"
} }


PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR, PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR,


+ 9
- 0
fastNLP/io/loader/classification.py View File

@@ -518,3 +518,12 @@ class WeiboSenti100kLoader(Loader):
if raw_chars: if raw_chars:
ds.append(Instance(raw_chars=raw_chars, target=target)) ds.append(Instance(raw_chars=raw_chars, target=target))
return ds return ds

def download(self) -> str:
"""
自动下载数据,该数据取自 https://github.com/SophonPlus/ChineseNlpCorpus/
在 https://arxiv.org/abs/1906.08101 有使用
:return:
"""
output_dir = self._get_dataset_path('weibo-senti-100k')
return output_dir

+ 10
- 0
fastNLP/io/loader/matching.py View File

@@ -377,6 +377,16 @@ class XNLILoader(Loader):
data_bundle = DataBundle(datasets=datasets) data_bundle = DataBundle(datasets=datasets)
return data_bundle return data_bundle


def download(self) -> str:
"""
自动下载数据,该数据取自 https://arxiv.org/abs/1809.05053
在 https://arxiv.org/pdf/1905.05526.pdf https://arxiv.org/pdf/1901.10125.pdf
https://arxiv.org/pdf/1809.05053.pdf 有使用
:return:
"""
output_dir = self._get_dataset_path('xnli')
return output_dir



class BQCorpusLoader(Loader): class BQCorpusLoader(Loader):
""" """


Loading…
Cancel
Save