* add ignore_type in DataSet.applytags/v0.4.10
@@ -288,6 +288,8 @@ class DataSet(object): | |||||
extra_param['is_input'] = kwargs['is_input'] | extra_param['is_input'] = kwargs['is_input'] | ||||
if 'is_target' in kwargs: | if 'is_target' in kwargs: | ||||
extra_param['is_target'] = kwargs['is_target'] | extra_param['is_target'] = kwargs['is_target'] | ||||
if 'ignore_type' in kwargs: | |||||
extra_param['ignore_type'] = kwargs['ignore_type'] | |||||
if new_field_name is not None: | if new_field_name is not None: | ||||
if new_field_name in self.field_arrays: | if new_field_name in self.field_arrays: | ||||
# overwrite the field, keep same attributes | # overwrite the field, keep same attributes | ||||
@@ -296,11 +298,14 @@ class DataSet(object): | |||||
extra_param['is_input'] = old_field.is_input | extra_param['is_input'] = old_field.is_input | ||||
if 'is_target' not in extra_param: | if 'is_target' not in extra_param: | ||||
extra_param['is_target'] = old_field.is_target | extra_param['is_target'] = old_field.is_target | ||||
if 'ignore_type' not in extra_param: | |||||
extra_param['ignore_type'] = old_field.ignore_type | |||||
self.add_field(name=new_field_name, fields=results, is_input=extra_param["is_input"], | self.add_field(name=new_field_name, fields=results, is_input=extra_param["is_input"], | ||||
is_target=extra_param["is_target"]) | |||||
is_target=extra_param["is_target"], ignore_type=extra_param['ignore_type']) | |||||
else: | else: | ||||
self.add_field(name=new_field_name, fields=results, is_input=extra_param.get("is_input", None), | self.add_field(name=new_field_name, fields=results, is_input=extra_param.get("is_input", None), | ||||
is_target=extra_param.get("is_target", None)) | |||||
is_target=extra_param.get("is_target", None), | |||||
ignore_type=extra_param.get("ignore_type", False)) | |||||
else: | else: | ||||
return results | return results | ||||
@@ -83,6 +83,8 @@ class AutoPadder(PadderBase): | |||||
array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype) | array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype) | ||||
for i, content in enumerate(contents): | for i, content in enumerate(contents): | ||||
array[i][:len(content)] = content | array[i][:len(content)] = content | ||||
elif field_ele_dtype is None: | |||||
array = contents # 当ignore_type=True时,直接返回contents | |||||
else: # should only be str | else: # should only be str | ||||
array = np.array([content for content in contents]) | array = np.array([content for content in contents]) | ||||
return array | return array | ||||
@@ -120,6 +120,9 @@ class TestDataSetMethods(unittest.TestCase): | |||||
self.assertTrue(isinstance(res, list) and len(res) > 0) | self.assertTrue(isinstance(res, list) and len(res) > 0) | ||||
self.assertTrue(res[0], 4) | self.assertTrue(res[0], 4) | ||||
ds.apply(lambda ins: (len(ins["x"]), "hahaha"), new_field_name="k", ignore_type=True) | |||||
# expect no exception raised | |||||
def test_drop(self): | def test_drop(self): | ||||
ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6], [7, 8, 9, 0]] * 20}) | ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6], [7, 8, 9, 0]] * 20}) | ||||
ds.drop(lambda ins: len(ins["y"]) < 3) | ds.drop(lambda ins: len(ins["y"]) < 3) | ||||
@@ -170,7 +173,7 @@ class TestDataSetMethods(unittest.TestCase): | |||||
dataset.apply(split_sent, new_field_name='words', is_input=True) | dataset.apply(split_sent, new_field_name='words', is_input=True) | ||||
# print(dataset) | # print(dataset) | ||||
def test_add_field(self): | |||||
def test_add_field_v2(self): | |||||
ds = DataSet({"x": [3, 4]}) | ds = DataSet({"x": [3, 4]}) | ||||
ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']], is_input=True, is_target=True) | ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']], is_input=True, is_target=True) | ||||
# ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y') | # ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y') | ||||
@@ -222,4 +222,14 @@ class TestPadder(unittest.TestCase): | |||||
[[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]], | [[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]], | ||||
[[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]], | [[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]], | ||||
padder(contents, None, np.int64).tolist() | padder(contents, None, np.int64).tolist() | ||||
) | |||||
) | |||||
def test_None_dtype(self): | |||||
from fastNLP.core.fieldarray import AutoPadder | |||||
padder = AutoPadder() | |||||
content = [ | |||||
[[1, 2, 3], [4, 5], [7, 8, 9, 10]], | |||||
[[1]] | |||||
] | |||||
ans = padder(content, None, None) | |||||
self.assertListEqual(content, ans) |