diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py index f25e2cfd..4b995c94 100644 --- a/fastNLP/core/dataset.py +++ b/fastNLP/core/dataset.py @@ -288,6 +288,8 @@ class DataSet(object): extra_param['is_input'] = kwargs['is_input'] if 'is_target' in kwargs: extra_param['is_target'] = kwargs['is_target'] + if 'ignore_type' in kwargs: + extra_param['ignore_type'] = kwargs['ignore_type'] if new_field_name is not None: if new_field_name in self.field_arrays: # overwrite the field, keep same attributes @@ -296,11 +298,14 @@ class DataSet(object): extra_param['is_input'] = old_field.is_input if 'is_target' not in extra_param: extra_param['is_target'] = old_field.is_target + if 'ignore_type' not in extra_param: + extra_param['ignore_type'] = old_field.ignore_type self.add_field(name=new_field_name, fields=results, is_input=extra_param["is_input"], - is_target=extra_param["is_target"]) + is_target=extra_param["is_target"], ignore_type=extra_param['ignore_type']) else: self.add_field(name=new_field_name, fields=results, is_input=extra_param.get("is_input", None), - is_target=extra_param.get("is_target", None)) + is_target=extra_param.get("is_target", None), + ignore_type=extra_param.get("ignore_type", False)) else: return results diff --git a/fastNLP/core/fieldarray.py b/fastNLP/core/fieldarray.py index 148dfc6c..1d95dbeb 100644 --- a/fastNLP/core/fieldarray.py +++ b/fastNLP/core/fieldarray.py @@ -83,6 +83,8 @@ class AutoPadder(PadderBase): array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype) for i, content in enumerate(contents): array[i][:len(content)] = content + elif field_ele_dtype is None: + array = contents # 当ignore_type=True时,直接返回contents else: # should only be str array = np.array([content for content in contents]) return array diff --git a/test/core/test_dataset.py b/test/core/test_dataset.py index 231fedd0..eb4c97e8 100644 --- a/test/core/test_dataset.py +++ b/test/core/test_dataset.py @@ -120,6 +120,9 @@ class TestDataSetMethods(unittest.TestCase): self.assertTrue(isinstance(res, list) and len(res) > 0) self.assertTrue(res[0], 4) + ds.apply(lambda ins: (len(ins["x"]), "hahaha"), new_field_name="k", ignore_type=True) + # expect no exception raised + def test_drop(self): ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6], [7, 8, 9, 0]] * 20}) ds.drop(lambda ins: len(ins["y"]) < 3) @@ -170,7 +173,7 @@ class TestDataSetMethods(unittest.TestCase): dataset.apply(split_sent, new_field_name='words', is_input=True) # print(dataset) - def test_add_field(self): + def test_add_field_v2(self): ds = DataSet({"x": [3, 4]}) ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']], is_input=True, is_target=True) # ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y') diff --git a/test/core/test_fieldarray.py b/test/core/test_fieldarray.py index e3595f9a..ff1a8314 100644 --- a/test/core/test_fieldarray.py +++ b/test/core/test_fieldarray.py @@ -222,4 +222,14 @@ class TestPadder(unittest.TestCase): [[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]], [[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]], padder(contents, None, np.int64).tolist() - ) \ No newline at end of file + ) + + def test_None_dtype(self): + from fastNLP.core.fieldarray import AutoPadder + padder = AutoPadder() + content = [ + [[1, 2, 3], [4, 5], [7, 8, 9, 10]], + [[1]] + ] + ans = padder(content, None, None) + self.assertListEqual(content, ans)