Browse Source

* AutoPadder will not pad when dtype is None

* add ignore_type in DataSet.apply
tags/v0.4.10
FengZiYjun 5 years ago
parent
commit
95a72f06b9
4 changed files with 24 additions and 4 deletions
  1. +7
    -2
      fastNLP/core/dataset.py
  2. +2
    -0
      fastNLP/core/fieldarray.py
  3. +4
    -1
      test/core/test_dataset.py
  4. +11
    -1
      test/core/test_fieldarray.py

+ 7
- 2
fastNLP/core/dataset.py View File

@@ -288,6 +288,8 @@ class DataSet(object):
extra_param['is_input'] = kwargs['is_input'] extra_param['is_input'] = kwargs['is_input']
if 'is_target' in kwargs: if 'is_target' in kwargs:
extra_param['is_target'] = kwargs['is_target'] extra_param['is_target'] = kwargs['is_target']
if 'ignore_type' in kwargs:
extra_param['ignore_type'] = kwargs['ignore_type']
if new_field_name is not None: if new_field_name is not None:
if new_field_name in self.field_arrays: if new_field_name in self.field_arrays:
# overwrite the field, keep same attributes # overwrite the field, keep same attributes
@@ -296,11 +298,14 @@ class DataSet(object):
extra_param['is_input'] = old_field.is_input extra_param['is_input'] = old_field.is_input
if 'is_target' not in extra_param: if 'is_target' not in extra_param:
extra_param['is_target'] = old_field.is_target extra_param['is_target'] = old_field.is_target
if 'ignore_type' not in extra_param:
extra_param['ignore_type'] = old_field.ignore_type
self.add_field(name=new_field_name, fields=results, is_input=extra_param["is_input"], self.add_field(name=new_field_name, fields=results, is_input=extra_param["is_input"],
is_target=extra_param["is_target"])
is_target=extra_param["is_target"], ignore_type=extra_param['ignore_type'])
else: else:
self.add_field(name=new_field_name, fields=results, is_input=extra_param.get("is_input", None), self.add_field(name=new_field_name, fields=results, is_input=extra_param.get("is_input", None),
is_target=extra_param.get("is_target", None))
is_target=extra_param.get("is_target", None),
ignore_type=extra_param.get("ignore_type", False))
else: else:
return results return results




+ 2
- 0
fastNLP/core/fieldarray.py View File

@@ -83,6 +83,8 @@ class AutoPadder(PadderBase):
array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype) array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype)
for i, content in enumerate(contents): for i, content in enumerate(contents):
array[i][:len(content)] = content array[i][:len(content)] = content
elif field_ele_dtype is None:
array = contents # 当ignore_type=True时,直接返回contents
else: # should only be str else: # should only be str
array = np.array([content for content in contents]) array = np.array([content for content in contents])
return array return array


+ 4
- 1
test/core/test_dataset.py View File

@@ -120,6 +120,9 @@ class TestDataSetMethods(unittest.TestCase):
self.assertTrue(isinstance(res, list) and len(res) > 0) self.assertTrue(isinstance(res, list) and len(res) > 0)
self.assertTrue(res[0], 4) self.assertTrue(res[0], 4)


ds.apply(lambda ins: (len(ins["x"]), "hahaha"), new_field_name="k", ignore_type=True)
# expect no exception raised

def test_drop(self): def test_drop(self):
ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6], [7, 8, 9, 0]] * 20}) ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6], [7, 8, 9, 0]] * 20})
ds.drop(lambda ins: len(ins["y"]) < 3) ds.drop(lambda ins: len(ins["y"]) < 3)
@@ -170,7 +173,7 @@ class TestDataSetMethods(unittest.TestCase):
dataset.apply(split_sent, new_field_name='words', is_input=True) dataset.apply(split_sent, new_field_name='words', is_input=True)
# print(dataset) # print(dataset)


def test_add_field(self):
def test_add_field_v2(self):
ds = DataSet({"x": [3, 4]}) ds = DataSet({"x": [3, 4]})
ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']], is_input=True, is_target=True) ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']], is_input=True, is_target=True)
# ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y') # ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y')


+ 11
- 1
test/core/test_fieldarray.py View File

@@ -222,4 +222,14 @@ class TestPadder(unittest.TestCase):
[[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]], [[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]],
[[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]], [[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]],
padder(contents, None, np.int64).tolist() padder(contents, None, np.int64).tolist()
)
)

def test_None_dtype(self):
from fastNLP.core.fieldarray import AutoPadder
padder = AutoPadder()
content = [
[[1, 2, 3], [4, 5], [7, 8, 9, 10]],
[[1]]
]
ans = padder(content, None, None)
self.assertListEqual(content, ans)

Loading…
Cancel
Save