You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_rename.py 2.0 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import mindspore.dataset as ds
  17. from mindspore import log as logger
  18. DATA_DIR = ["../data/dataset/testTFBert5Rows1/5TFDatas.data"]
  19. DATA_DIR_2 = ["../data/dataset/testTFBert5Rows2/5TFDatas.data"]
  20. SCHEMA_DIR = "../data/dataset/testTFBert5Rows1/datasetSchema.json"
  21. SCHEMA_DIR_2 = "../data/dataset/testTFBert5Rows2/datasetSchema.json"
  22. def test_rename():
  23. data1 = ds.TFRecordDataset(DATA_DIR_2, SCHEMA_DIR_2, shuffle=False)
  24. data2 = ds.TFRecordDataset(DATA_DIR_2, SCHEMA_DIR_2, shuffle=False)
  25. data2 = data2.rename(input_columns=["input_ids", "segment_ids"], output_columns=["masks", "seg_ids"])
  26. data = ds.zip((data1, data2))
  27. data = data.repeat(3)
  28. num_iter = 0
  29. for _, item in enumerate(data.create_dict_iterator()):
  30. logger.info("item[mask] is {}".format(item["masks"]))
  31. np.testing.assert_equal(item["masks"], item["input_ids"])
  32. logger.info("item[seg_ids] is {}".format(item["seg_ids"]))
  33. np.testing.assert_equal(item["segment_ids"], item["seg_ids"])
  34. # need to consume the data in the buffer
  35. num_iter += 1
  36. logger.info("Number of data in data: {}".format(num_iter))
  37. assert num_iter == 15
  38. if __name__ == '__main__':
  39. logger.info('===========test Rename Repeat===========')
  40. test_rename()
  41. logger.info('\n')