You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

_lineage_writer.py 9.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Lineage writer to record lineage to summary log."""
  16. import json
  17. import os
  18. import random
  19. import shutil
  20. from ._summary_record import LineageSummary
  21. from .base import Metadata
  22. CHILDREN_0 = {
  23. 'dataset_dir': '/home/anthony/MindData/tests/dataset/data/testMnistData',
  24. 'op_module': 'minddata.dataengine.datasets',
  25. 'num_shards': None,
  26. 'num_parallel_workers': None,
  27. 'shuffle': None,
  28. 'op_type': 'MnistDataset',
  29. 'shard_id': None,
  30. 'num_samples': 100,
  31. 'sampler': {
  32. 'sampler_module': 'minddata.dataengine.samplers',
  33. 'sampler_name': 'RandomSampler',
  34. 'replacement': True,
  35. 'num_samples': 100
  36. },
  37. 'children': []
  38. }
  39. CHILDREN_1 = {
  40. 'op_type': 'MapDataset',
  41. 'op_module': 'minddata.dataengine.datasets',
  42. 'num_parallel_workers': None,
  43. 'input_columns': ['image'],
  44. 'operations': [],
  45. 'children': []
  46. }
  47. CHILDREN_2 = {
  48. 'op_type': 'MapDataset',
  49. 'op_module': 'minddata.dataengine.datasets',
  50. 'num_parallel_workers': None,
  51. 'output_columns': [None],
  52. 'input_columns': ['label'],
  53. 'operations': [{
  54. 'tensor_op_module': 'minddata.transforms.c_transforms',
  55. 'tensor_op_name': 'OneHot',
  56. 'num_classes': 10
  57. }],
  58. 'children': []
  59. }
  60. CHILDREN_3 = {
  61. 'op_type': 'ShuffleDataset',
  62. 'op_module': 'minddata.dataengine.datasets',
  63. 'num_parallel_workers': None,
  64. 'buffer_size': 10,
  65. 'children': []
  66. }
  67. def _get_operations(rescale=0.003921, normalize_weight=0.48):
  68. """Get operations."""
  69. operation_0 = {
  70. 'tensor_op_module': 'minddata.transforms.c_transforms',
  71. 'tensor_op_name': 'RandomCrop',
  72. 'weight': [32, 32, 4, 4, 4, 4],
  73. 'padding_mode': "constant",
  74. 'pad_if_needed': False,
  75. 'fill_value': 0
  76. }
  77. operation_1 = {
  78. 'tensor_op_module': 'minddata.transforms.c_transforms',
  79. 'tensor_op_name': 'Rescale',
  80. 'rescale': rescale,
  81. 'shift': 0,
  82. 'num_classes': 10
  83. }
  84. operation_2 = {
  85. 'tensor_op_module': 'minddata.transforms.c_transforms',
  86. 'tensor_op_name': 'Normalize',
  87. 'weights': [normalize_weight]
  88. }
  89. return [operation_0, operation_1, operation_2]
  90. def generate_graph(dataset_name='MnistDataset', batch_size=16, buffer_size=10,
  91. rescale=0.003921, num_samples=100, normalize_weight=0.48):
  92. """Generate dataset graph."""
  93. children_0 = dict(CHILDREN_0)
  94. children_0['op_type'] = dataset_name
  95. children_0['num_samples'] = num_samples
  96. children_0['sampler']['num_samples'] = num_samples
  97. children_1 = dict(CHILDREN_1)
  98. children_1['operations'] = _get_operations(rescale, normalize_weight)
  99. children_1['children'] = [children_0]
  100. children_2 = dict(CHILDREN_2)
  101. children_2['buffer_size'] = buffer_size
  102. children_2['children'] = [children_1]
  103. children_3 = dict(CHILDREN_3)
  104. children_3['children'] = [children_2]
  105. dataset_graph = {
  106. 'num_parallel_workers': None,
  107. 'op_type': 'BatchDataset',
  108. 'op_module': 'minddata.dataengine.datasets',
  109. 'drop_remainder': True,
  110. 'batch_size': batch_size,
  111. 'children': [children_3]
  112. }
  113. return dataset_graph
  114. def get_train_args():
  115. """Get default train args."""
  116. train_args = dict()
  117. train_args[Metadata.train_network] = "LeNet5"
  118. train_args[Metadata.loss] = 0.01
  119. train_args[Metadata.learning_rate] = 0.01
  120. train_args[Metadata.optimizer] = "Momentum"
  121. train_args[Metadata.loss_function] = "SoftmaxCrossEntropyWithLogits"
  122. train_args[Metadata.epoch] = 500
  123. train_args[Metadata.parallel_mode] = ""
  124. train_args[Metadata.device_num] = 1
  125. train_args[Metadata.batch_size] = 32
  126. train_args[Metadata.train_dataset_path] = "/home/data/train"
  127. train_args[Metadata.train_dataset_size] = 301234
  128. train_args[Metadata.model_path] = "/home/demo/demo_model.pkl"
  129. train_args[Metadata.model_size] = 100 * 1024 * 1024
  130. train_args["user_defined_info"] = {"Version_train": "v1"}
  131. train_args["dataset_graph"] = generate_graph()
  132. return train_args
  133. def get_eval_args():
  134. """Get default eval args."""
  135. eval_args = dict()
  136. eval_args[Metadata.metrics] = json.dumps({"acc": 0.88})
  137. eval_args[Metadata.valid_dataset_path] = "/home/data/test"
  138. eval_args[Metadata.valid_dataset_size] = 5000
  139. eval_args["user_defined_info"] = {"Version_eval": "v1"}
  140. return eval_args
  141. class LineageWriter:
  142. """Lineage writer."""
  143. def __init__(self, base_dir, summary_type=None):
  144. if summary_type is None:
  145. self._summary_type = ['train', 'eval']
  146. self.base_dir = base_dir
  147. self._init_summary_base_dir()
  148. def _init_summary_base_dir(self, clean_base_dir=False):
  149. """Init summary base dir."""
  150. if clean_base_dir and os.path.exists(self.base_dir):
  151. shutil.rmtree(self.base_dir)
  152. if not os.path.exists(self.base_dir):
  153. os.makedirs(self.base_dir)
  154. def _create_event(self, lineage_summary, args, mode='train'):
  155. """Create event."""
  156. if mode == 'train':
  157. lineage_summary.record_train_lineage(args)
  158. lineage_summary.record_user_defined_info(args["user_defined_info"])
  159. lineage_summary.record_dataset_graph(args["dataset_graph"])
  160. else:
  161. lineage_summary.record_evaluation_lineage(args)
  162. lineage_summary.record_user_defined_info(args["user_defined_info"])
  163. def _get_random_train_args(self):
  164. """Get random train args."""
  165. network = ['ResNet', 'LeNet5', 'AlexNet']
  166. optimizer = ['SGD', 'Adam', 'Momentum']
  167. loss_function = ["SoftmaxCrossEntropyWithLogits", "CrossEntropyLoss"]
  168. dataset = ['MindDataset', 'MnistDataset', 'Cifar10Datset']
  169. train_args = dict()
  170. train_args[Metadata.learning_rate] = random.uniform(0.001, 0.005)
  171. train_args[Metadata.loss] = random.uniform(0.001, 0.005)
  172. train_args[Metadata.epoch] = random.choice([100, 200, 300])
  173. train_args[Metadata.batch_size] = random.choice([16, 32, 64])
  174. train_args[Metadata.model_size] = random.randint(350, 450) * 1024 * 1024
  175. train_args[Metadata.train_network] = random.choice(network)
  176. train_args[Metadata.optimizer] = random.choice(optimizer)
  177. train_args[Metadata.device_num] = random.choice([1, 2, 4, 6, 8])
  178. train_args[Metadata.loss_function] = random.choice(loss_function)
  179. train_args[Metadata.train_dataset_size] = random.choice([56, 67, 78]) * 10000
  180. dataset_graph = generate_graph(
  181. dataset_name=random.choice(dataset),
  182. batch_size=random.choice([8, 16, 32, 64]),
  183. buffer_size=random.choice([10, 20, 30]),
  184. rescale=random.choice([0.003921, 0.005632, 0.0078, 0.005678]),
  185. num_samples=random.choice([100, 200, 300]),
  186. normalize_weight=random.choice([0.20, 0.50]) # random.uniform(0.2, 0.5)
  187. )
  188. train_args["dataset_graph"] = dataset_graph
  189. return train_args
  190. def _get_random_eval_args(self):
  191. """Get random eval args."""
  192. eval_args = dict()
  193. eval_args[Metadata.valid_dataset_size] = random.choice([13, 24, 28]) * 100
  194. eval_args[Metadata.metrics] = json.dumps({'Accuracy': random.uniform(0.85, 0.96)})
  195. return eval_args
  196. def create_summary_for_one_train(self, train_id, mode='train', random_mode=True, user_defined_params=None):
  197. """Create summary for one train."""
  198. summary_dir = os.path.join(self.base_dir, train_id)
  199. if not os.path.exists(summary_dir):
  200. os.makedirs(summary_dir)
  201. lineage_summary = LineageSummary(summary_dir)
  202. args = {}
  203. if mode == 'train':
  204. args = get_train_args()
  205. params = self._get_random_train_args() if random_mode else {}
  206. args.update(params)
  207. elif mode == 'eval':
  208. args = get_eval_args()
  209. params = self._get_random_eval_args() if random_mode else {}
  210. args.update(params)
  211. if user_defined_params is not None:
  212. args.update(user_defined_params)
  213. self._create_event(lineage_summary, args, mode)
  214. def create_summaries(self, train_id_prefix='train_', start_id=1, train_job_num=1, random_mode=True, params=None):
  215. """Create summaries for several trains."""
  216. if params is None:
  217. params = {}
  218. train_ids = [f'./{train_id_prefix}{i}' for i in range(start_id, start_id + train_job_num)]
  219. for train_id in train_ids:
  220. user_defined_params = params.get(train_id, {})
  221. for mode in self._summary_type:
  222. self.create_summary_for_one_train(train_id, mode, random_mode, user_defined_params.get(mode))