Browse Source

add dataset method

tags/v1.0.0alpha
MorningForest 2 years ago
parent
commit
a3adafea34
2 changed files with 22 additions and 1 deletions
  1. +15
    -1
      fastNLP/core/dataset/dataset.py
  2. +7
    -0
      tests/core/dataset/test_dataset.py

+ 15
- 1
fastNLP/core/dataset/dataset.py View File

@@ -1037,4 +1037,18 @@ class DataSet:
self.collator.set_ignore(*field_names)
return self.collator
else:
raise ValueError(f"Only when the collate_fn is a fastNLP Collator, set_ignore() is allowed.")
raise ValueError(f"Only when the collate_fn is a fastNLP Collator, set_ignore() is allowed.")

@classmethod
def from_datasets(cls, dataset):
"""
将 Huggingface Dataset 转为 fastNLP 的 DataSet

:param dataset 为实例化好的 huggingface Dataset 对象
"""
from datasets import Dataset
if not isinstance(dataset, DataSet):
raise ValueError(f"Support huggingface dataset, but is {type(dataset)}!")

data_dict = dataset.to_dict()
return DataSet(data_dict)

+ 7
- 0
tests/core/dataset/test_dataset.py View File

@@ -522,3 +522,10 @@ class TestCase:
ins = Instance(**fields)
# simple print, that is enough.
print(ins)

def test_dataset(self):
from datasets import Dataset as HuggingfaceDataset
# ds = DataSet({"x": ["11sxa", "1sasz"]*100, "y": [0, 1]*100})
ds = HuggingfaceDataset.from_dict({"x": ["11sxa", "1sasz"]*100, "y": [0, 1]*100})
print(DataSet.from_datasets(ds))
# print(ds.from_datasets())

Loading…
Cancel
Save