You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_datasets_textfileop.py 3.5 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import mindspore.dataset as ds
  16. from mindspore import log as logger
  17. from util import config_get_set_num_parallel_workers
  18. DATA_FILE = "../data/dataset/testTextFileDataset/1.txt"
  19. DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*"
  20. def test_textline_dataset_one_file():
  21. data = ds.TextFileDataset(DATA_FILE)
  22. count = 0
  23. for i in data.create_dict_iterator():
  24. logger.info("{}".format(i["text"]))
  25. count += 1
  26. assert count == 3
  27. def test_textline_dataset_all_file():
  28. data = ds.TextFileDataset(DATA_ALL_FILE)
  29. count = 0
  30. for i in data.create_dict_iterator():
  31. logger.info("{}".format(i["text"]))
  32. count += 1
  33. assert count == 5
  34. def test_textline_dataset_totext():
  35. original_num_parallel_workers = config_get_set_num_parallel_workers(4)
  36. data = ds.TextFileDataset(DATA_ALL_FILE, shuffle=False)
  37. count = 0
  38. line = ["This is a text file.", "Another file.",
  39. "Be happy every day.", "End of file.", "Good luck to everyone."]
  40. for i in data.create_dict_iterator():
  41. strs = i["text"].item().decode("utf8")
  42. assert strs == line[count]
  43. count += 1
  44. assert count == 5
  45. # Restore configuration num_parallel_workers
  46. ds.config.set_num_parallel_workers(original_num_parallel_workers)
  47. def test_textline_dataset_num_samples():
  48. data = ds.TextFileDataset(DATA_FILE, num_samples=2)
  49. count = 0
  50. for _ in data.create_dict_iterator():
  51. count += 1
  52. assert count == 2
  53. def test_textline_dataset_distribution():
  54. data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1)
  55. count = 0
  56. for _ in data.create_dict_iterator():
  57. count += 1
  58. assert count == 3
  59. def test_textline_dataset_repeat():
  60. data = ds.TextFileDataset(DATA_FILE, shuffle=False)
  61. data = data.repeat(3)
  62. count = 0
  63. line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.",
  64. "This is a text file.", "Be happy every day.", "Good luck to everyone.",
  65. "This is a text file.", "Be happy every day.", "Good luck to everyone."]
  66. for i in data.create_dict_iterator():
  67. strs = i["text"].item().decode("utf8")
  68. assert strs == line[count]
  69. count += 1
  70. assert count == 9
  71. def test_textline_dataset_get_datasetsize():
  72. data = ds.TextFileDataset(DATA_FILE)
  73. size = data.get_dataset_size()
  74. assert size == 3
  75. def test_textline_dataset_to_device():
  76. data = ds.TextFileDataset(DATA_FILE, shuffle=False)
  77. data = data.to_device()
  78. data.send()
  79. if __name__ == "__main__":
  80. test_textline_dataset_one_file()
  81. test_textline_dataset_all_file()
  82. test_textline_dataset_totext()
  83. test_textline_dataset_num_samples()
  84. test_textline_dataset_distribution()
  85. test_textline_dataset_repeat()
  86. test_textline_dataset_get_datasetsize()