You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lineage_summary_analyzer.py 8.1 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """This module provides python APIs to get lineage summary from summary log."""
  16. import struct
  17. from collections import namedtuple
  18. from enum import Enum
  19. from google.protobuf.json_format import MessageToDict
  20. from mindinsight.datavisual.proto_files.mindinsight_lineage_pb2 import LineageEvent
  21. from mindinsight.datavisual.utils import crc32
  22. from mindinsight.lineagemgr.common.exceptions.exceptions import MindInsightException, \
  23. LineageVerificationException, LineageSummaryAnalyzeException
  24. from mindinsight.lineagemgr.common.log import logger as log
  25. from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path
  26. from mindinsight.lineagemgr.summary.file_handler import FileHandler
  27. LineageInfo = namedtuple('LineageInfo', ['train_lineage', 'eval_lineage', 'dataset_graph'])
  28. class SummaryTag(Enum):
  29. """The tag value of lineage fields."""
  30. # the value is `field_number << 3 | wire_type`
  31. WALL_TIME = 'wall_time'
  32. STEP = 'step'
  33. VERSION = 'version'
  34. GRAPH = 'graph'
  35. SUMMARY = 'summary'
  36. TRAIN_LINEAGE = 'train_lineage'
  37. EVAL_LINEAGE = 'evaluation_lineage'
  38. DATASET_GRAPH = 'dataset_graph'
  39. class SummaryAnalyzer:
  40. """
  41. Summary log Analyzer.
  42. Args:
  43. file_path (str): The path of summary log.
  44. Raises:
  45. LineageVerificationException: Raise when verification failed.
  46. """
  47. HEADER_SIZE = 8
  48. HEADER_CRC_SIZE = 4
  49. BODY_CRC_SIZE = 4
  50. def __init__(self, file_path):
  51. self.file_handler = FileHandler(file_path)
  52. def load_events(self):
  53. """
  54. Load events in summary log.
  55. Returns:
  56. generator, the event generator.
  57. """
  58. while self._has_next():
  59. yield self._read_event()
  60. def _has_next(self):
  61. """
  62. Check if the file has reached the end.
  63. Returns:
  64. bool, whether the file has reached the end.
  65. """
  66. current_offset = self.file_handler.tell()
  67. if current_offset < self.file_handler.size:
  68. return True
  69. return False
  70. def _read_event(self):
  71. """
  72. Read event.
  73. Returns:
  74. LineageEvent, the event body.
  75. """
  76. body_size = self._read_header()
  77. body_str = self._read_body(body_size)
  78. event = LineageEvent().FromString(body_str)
  79. return event
  80. def _read_header(self):
  81. """
  82. Read header information.
  83. Returns:
  84. int, the length of event body.
  85. """
  86. header_str = self.file_handler.read(self.HEADER_SIZE)
  87. header_crc_str = self.file_handler.read(self.HEADER_CRC_SIZE)
  88. SummaryAnalyzer._check_crc(header_str, header_crc_str)
  89. body_len = struct.unpack("<Q", header_str)[0]
  90. return body_len
  91. def _read_body(self, body_size):
  92. """
  93. Read event body information.
  94. Args:
  95. body_size (int): The size of event body.
  96. Returns:
  97. bytes, the event body in bytes.
  98. """
  99. body_str = self.file_handler.read(body_size)
  100. body_crc_str = self.file_handler.read(self.BODY_CRC_SIZE)
  101. SummaryAnalyzer._check_crc(body_str, body_crc_str)
  102. return body_str
  103. @staticmethod
  104. def _check_crc(source_str, crc_str):
  105. """
  106. Check the integrity of source string.
  107. Args:
  108. source_str (bytes): Source string in bytes.
  109. crc_str (bytes): CRC string of source string in bytes.
  110. Raises:
  111. LineageVerificationException: Raise when verification failed.
  112. """
  113. if not crc32.CheckValueAgainstData(crc_str, source_str, len(source_str)):
  114. log.error("The CRC verification failed.")
  115. raise LineageVerificationException("The CRC verification failed.")
  116. class LineageSummaryAnalyzer(SummaryAnalyzer):
  117. """
  118. Summary log analyzer for lineage information.
  119. Args:
  120. file_path (str): The path of summary log.
  121. Raises:
  122. LineageSummaryAnalyzeException: If failed to get lineage information.
  123. """
  124. def __init__(self, file_path):
  125. file_path = safe_normalize_path(file_path, 'lineage_summary_path', None)
  126. super(LineageSummaryAnalyzer, self).__init__(file_path)
  127. def get_latest_info(self):
  128. """
  129. Get latest lineage info in summary log file.
  130. Returns:
  131. LineageInfo, the lineage summary information.
  132. """
  133. lineage_events = {
  134. SummaryTag.TRAIN_LINEAGE: None,
  135. SummaryTag.EVAL_LINEAGE: None,
  136. SummaryTag.DATASET_GRAPH: None
  137. }
  138. for event in self.load_events():
  139. for tag, _ in lineage_events.items():
  140. if event.HasField(tag.value):
  141. lineage_events[tag] = event
  142. break
  143. lineage_info = LineageInfo(
  144. train_lineage=lineage_events.get(SummaryTag.TRAIN_LINEAGE),
  145. eval_lineage=lineage_events.get(SummaryTag.EVAL_LINEAGE),
  146. dataset_graph=lineage_events.get(SummaryTag.DATASET_GRAPH)
  147. )
  148. return lineage_info
  149. @classmethod
  150. def get_summary_infos(cls, file_path):
  151. """
  152. Get lineage summary information from summary log file.
  153. Args:
  154. file_path (str): The file path of summary log.
  155. Returns:
  156. LineageInfo, the lineage summary information.
  157. Raises:
  158. LineageSummaryAnalyzeException: If failed to get lineage information.
  159. """
  160. analyzer = cls(file_path)
  161. try:
  162. lineage_info = analyzer.get_latest_info()
  163. except (MindInsightException, IOError) as err:
  164. log.error("Failed to get lineage information.")
  165. log.exception(err)
  166. raise LineageSummaryAnalyzeException()
  167. return lineage_info
  168. @staticmethod
  169. def get_user_defined_info(file_path):
  170. """
  171. Get user defined info.
  172. Args:
  173. file_path (str): The file path of summary log.
  174. Returns:
  175. list, the list of dict format user defined information
  176. which converted from proto message.
  177. """
  178. all_user_message = []
  179. summary_analyzer = SummaryAnalyzer(file_path)
  180. for event in summary_analyzer.load_events():
  181. if event.HasField("user_defined_info"):
  182. user_defined_info = MessageToDict(
  183. event,
  184. preserving_proto_field_name=True
  185. ).get("user_defined_info")
  186. user_dict = LineageSummaryAnalyzer._get_dict_from_proto(user_defined_info)
  187. all_user_message.append(user_dict)
  188. return all_user_message
  189. @staticmethod
  190. def _get_dict_from_proto(user_defined_info):
  191. """
  192. Convert the proto message UserDefinedInfo to its dict format.
  193. Args:
  194. user_defined_info (UserDefinedInfo): The proto message of user defined info.
  195. Returns:
  196. dict, the converted dict.
  197. """
  198. user_dict = dict()
  199. proto_dict = user_defined_info.get("user_info")
  200. for proto_item in proto_dict:
  201. if proto_item and isinstance(proto_item, dict):
  202. key, value = list(list(proto_item.values())[0].items())[0]
  203. if isinstance(value, dict):
  204. user_dict[key] = LineageSummaryAnalyzer._get_dict_from_proto(value)
  205. else:
  206. user_dict[key] = value
  207. return user_dict

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。