You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lineage_parser.py 7.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """This file is used to parse lineage info."""
  16. import os
  17. from mindinsight.lineagemgr.common.exceptions.exceptions import LineageSummaryAnalyzeException, \
  18. LineageEventNotExistException, LineageEventFieldNotExistException, LineageFileNotFoundError, \
  19. MindInsightException
  20. from mindinsight.lineagemgr.common.log import logger
  21. from mindinsight.lineagemgr.common.path_parser import SummaryPathParser
  22. from mindinsight.lineagemgr.summary.file_handler import FileHandler
  23. from mindinsight.lineagemgr.summary.lineage_summary_analyzer import LineageSummaryAnalyzer
  24. from mindinsight.lineagemgr.querier.query_model import LineageObj
  25. from mindinsight.utils.exceptions import ParamValueError
  26. LINEAGE = "lineage"
  27. class SuperLineageObj:
  28. """This is an object for LineageObj and its additional info."""
  29. def __init__(self, lineage_obj: LineageObj, update_time, added_info=None):
  30. self._lineage_obj = lineage_obj
  31. self._update_time = update_time
  32. self._added_info = added_info if added_info is not None else dict()
  33. @property
  34. def lineage_obj(self):
  35. """Get lineage object."""
  36. return self._lineage_obj
  37. @property
  38. def added_info(self):
  39. """Get added info."""
  40. return self._added_info
  41. @added_info.setter
  42. def added_info(self, added_info):
  43. """Set added info."""
  44. self._added_info = added_info
  45. @property
  46. def update_time(self):
  47. """Get update time."""
  48. return self._update_time
  49. @update_time.setter
  50. def update_time(self, update_time):
  51. """Set update_time."""
  52. self._update_time = update_time
  53. class LineageParser:
  54. """Lineage parser."""
  55. def __init__(self, train_id, summary_dir, update_time=None, added_info=None):
  56. self._summary_dir = summary_dir
  57. self._train_id = train_id
  58. self._update_time = update_time
  59. self._added_info = added_info
  60. self._init_variables()
  61. self.load()
  62. @property
  63. def update_time(self):
  64. return self._update_time
  65. @update_time.setter
  66. def update_time(self, update_time):
  67. self._update_time = update_time
  68. if self._super_lineage_obj is not None:
  69. self._super_lineage_obj.update_time = update_time
  70. def _init_variables(self):
  71. """Init variables."""
  72. self._super_lineage_obj = None
  73. self._latest_filename = None
  74. self._latest_file_size = None
  75. self._cached_file_list = None
  76. def load(self):
  77. """Find and load summaries."""
  78. # get sorted lineage files
  79. lineage_files = SummaryPathParser.get_lineage_summaries(self._summary_dir, is_sorted=True)
  80. if not lineage_files:
  81. logger.info('There is no summary log file under summary_dir %s.', self._summary_dir)
  82. raise LineageFileNotFoundError(
  83. 'There is no summary log file under summary_dir.'
  84. )
  85. self._init_if_files_deleted(lineage_files)
  86. index = 0
  87. if self._latest_filename is not None:
  88. index = lineage_files.index(self._latest_filename)
  89. for filename in lineage_files[index:]:
  90. if filename != self._latest_filename:
  91. self._latest_filename = filename
  92. self._latest_file_size = 0
  93. file_path = os.path.join(self._summary_dir, filename)
  94. new_size = FileHandler(file_path).size
  95. if new_size == self._latest_file_size:
  96. continue
  97. self._latest_file_size = new_size
  98. try:
  99. self._parse_summary_log()
  100. except (LineageSummaryAnalyzeException,
  101. LineageEventNotExistException,
  102. LineageEventFieldNotExistException) as error:
  103. logger.debug("Parse file failed, file_path is %s. Detail: %s", file_path, str(error))
  104. except MindInsightException as error:
  105. logger.exception(error)
  106. logger.debug("Parse file failed, file_path is %s.", file_path)
  107. def _init_if_files_deleted(self, file_list):
  108. """Init variables if files deleted."""
  109. cached_file_list = self._cached_file_list
  110. self._cached_file_list = file_list
  111. if cached_file_list is None:
  112. return
  113. deleted_files = set(cached_file_list) - set(file_list)
  114. if deleted_files:
  115. logger.info("There are some files has been deleted, "
  116. "all files will be reloaded in path %s.", self._summary_dir)
  117. self._init_variables()
  118. def _parse_summary_log(self):
  119. """
  120. Parse the single summary log.
  121. Returns:
  122. bool, `True` if parse summary log success, else `False`.
  123. """
  124. file_path = os.path.realpath(os.path.join(self._summary_dir, self._latest_filename))
  125. lineage_info = LineageSummaryAnalyzer.get_summary_infos(file_path)
  126. user_defined_info = LineageSummaryAnalyzer.get_user_defined_info(file_path)
  127. self._update_lineage_obj(lineage_info, user_defined_info)
  128. def _update_lineage_obj(self, lineage_info, user_defined_info):
  129. """Update lineage object."""
  130. if self._super_lineage_obj is None:
  131. lineage_obj = LineageObj(
  132. self._train_id,
  133. train_lineage=lineage_info.train_lineage,
  134. evaluation_lineage=lineage_info.eval_lineage,
  135. dataset_graph=lineage_info.dataset_graph,
  136. user_defined_info=user_defined_info
  137. )
  138. self._super_lineage_obj = SuperLineageObj(lineage_obj, self.update_time, self._added_info)
  139. else:
  140. self._super_lineage_obj.lineage_obj.parse_and_update_lineage(
  141. train_lineage=lineage_info.train_lineage,
  142. evaluation_lineage=lineage_info.eval_lineage,
  143. dataset_graph=lineage_info.dataset_graph,
  144. user_defined_info=user_defined_info
  145. )
  146. @property
  147. def super_lineage_obj(self):
  148. """Get super lineage object."""
  149. return self._super_lineage_obj
  150. class LineageOrganizer:
  151. """Lineage organizer."""
  152. def __init__(self, data_manager):
  153. self._data_manager = data_manager
  154. self._super_lineage_objs = {}
  155. self._organize_from_cache()
  156. def _organize_from_cache(self):
  157. """Organize lineage objs from cache."""
  158. brief_cache = self._data_manager.get_brief_cache()
  159. cache_items = brief_cache.cache_items
  160. for relative_dir, cache_train_job in cache_items.items():
  161. try:
  162. super_lineage_obj = cache_train_job.get("lineage").super_lineage_obj
  163. self._super_lineage_objs.update({relative_dir: super_lineage_obj})
  164. except ParamValueError:
  165. logger.debug("This is no lineage info in train job %s.", relative_dir)
  166. @property
  167. def super_lineage_objs(self):
  168. """Get super lineage objects."""
  169. return self._super_lineage_objs
  170. def get_super_lineage_obj(self, relative_path):
  171. """Get super lineage object by given relative path."""
  172. return self._super_lineage_objs.get(relative_path)