You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

minddata_pipeline_analyser.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """The minddata pipeline analyser class."""
  16. import csv
  17. import json
  18. import os
  19. import sys
  20. from mindinsight.profiler.analyser.base_analyser import BaseAnalyser
  21. from mindinsight.profiler.common.exceptions.exceptions import \
  22. ProfilerPipelineOpNotExistException
  23. from mindinsight.profiler.common.log import logger
  24. from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path
  25. class MinddataPipelineAnalyser(BaseAnalyser):
  26. """
  27. The analyser for analyzing the minddata pipeline operator and queue data.
  28. Args:
  29. profiling_dir (str): The directory where the parsed profiling files are
  30. located.
  31. device_id (str): The device ID.
  32. Raises:
  33. ProfilerPathErrorException: If the profiling dir is invalid.
  34. """
  35. _col_names = ['op_id', 'op_type', 'num_workers', 'output_queue_size',
  36. 'output_queue_average_size', 'output_queue_length',
  37. 'output_queue_usage_rate', 'sample_interval', 'parent_id',
  38. 'children_id']
  39. _file_name_pipeline = 'minddata_pipeline_raw_{}.csv'
  40. _index_op_id = 0
  41. _index_op_type = 1
  42. _index_num_workers = 2
  43. _index_output_queue_size = 3
  44. _index_output_queue_average_size = 4
  45. _index_output_queue_length = 5
  46. _index_output_queue_usage_rate = 6
  47. _index_sample_interval = 7
  48. _index_parent_id = 8
  49. _index_children_id = 9
  50. def __init__(self, profiling_dir, device_id):
  51. super().__init__(profiling_dir, device_id)
  52. self._none_filter_condition_key = ['threshold', 'is_display_op_detail']
  53. self._none_sort_col_names = ['output_queue_size', 'children_id']
  54. self._op_id_index_map = self._get_op_id_index_map()
  55. def get_op_and_parent_op_info(self, op_id):
  56. """
  57. Get the operator and parent operator information by `op_id`.
  58. Args:
  59. op_id (int): The minddata pipeline operator ID.
  60. Returns:
  61. dict, the operator and parent operator information.
  62. Raises:
  63. ProfilerPipelineOpNotExistException: If the minddata pipeline
  64. operator does not exist.
  65. """
  66. index = self._op_id_index_map.get(op_id)
  67. if index is None:
  68. raise ProfilerPipelineOpNotExistException(str(op_id))
  69. op_info = self._data[index]
  70. parent_id = op_info[self._index_parent_id]
  71. parent_index = self._op_id_index_map.get(parent_id)
  72. if parent_index is None:
  73. parent_op = None
  74. queue_info = None
  75. else:
  76. parent_op_info = self._data[parent_index]
  77. parent_op = {
  78. 'op_id': parent_op_info[self._index_op_id],
  79. 'op_type': parent_op_info[self._index_op_type],
  80. 'num_workers': parent_op_info[self._index_num_workers]
  81. }
  82. queue_info = {
  83. 'output_queue_size': op_info[self._index_output_queue_size],
  84. 'output_queue_average_size':
  85. op_info[self._index_output_queue_average_size],
  86. 'output_queue_length': op_info[self._index_output_queue_length],
  87. 'output_queue_usage_rate':
  88. op_info[self._index_output_queue_usage_rate],
  89. 'sample_interval': op_info[self._index_sample_interval]
  90. }
  91. current_op = {
  92. 'op_id': op_info[self._index_op_id],
  93. 'op_type': op_info[self._index_op_type],
  94. 'num_workers': op_info[self._index_num_workers]
  95. }
  96. return {
  97. 'current_op': current_op,
  98. 'parent_op': parent_op,
  99. 'queue_info': queue_info
  100. }
  101. def _load(self):
  102. """Load data according to the parsed minddata pipeline file."""
  103. pipeline_file_path = os.path.join(
  104. self._profiling_dir,
  105. self._file_name_pipeline.format(self._device_id)
  106. )
  107. pipeline_file_path = validate_and_normalize_path(
  108. pipeline_file_path, raise_key="Invalid pipeline file path.")
  109. if not os.path.isfile(pipeline_file_path):
  110. logger.warning('The file <%s> does not exist.', pipeline_file_path)
  111. return
  112. with open(pipeline_file_path, 'r') as file:
  113. csv.field_size_limit(sys.maxsize)
  114. csv_reader = csv.reader(file)
  115. _ = next(csv_reader)
  116. for info in csv_reader:
  117. self._data.append(self._convert_field_type(info))
  118. def _filter(self, filter_condition):
  119. """
  120. Filter the profiling data according to the filter condition.
  121. Args:
  122. filter_condition (dict): The filter condition.
  123. """
  124. def _inner_filter(item: list):
  125. return self._default_filter(item, filter_condition)
  126. def _inner_map(item: list):
  127. inner_item = item[0:2]
  128. inner_item.extend(item[4:])
  129. return inner_item
  130. threshold = filter_condition.get('threshold')
  131. is_display_op_detail = filter_condition.get(
  132. 'is_display_op_detail', False
  133. )
  134. self._set_display_col_name(is_display_op_detail)
  135. filter_result = list(filter(_inner_filter, self._data))
  136. if threshold:
  137. low_threshold = threshold[1]
  138. high_threshold = threshold[0]
  139. filter_result = self._filter_outside_threshold(
  140. filter_result, low_threshold, high_threshold
  141. )
  142. if is_display_op_detail:
  143. self._result = filter_result
  144. else:
  145. self._result = list(map(_inner_map, filter_result))
  146. def _filter_outside_threshold(self, data, low_threshold, high_threshold):
  147. """
  148. Get the data outside the threshold range.
  149. Args:
  150. data (list[list]): The filtered data.
  151. low_threshold (float): The low threshold.
  152. high_threshold (float): The high threshold.
  153. Returns:
  154. list[list], the data outside the threshold range.
  155. """
  156. root_node = None
  157. leaf_nodes = []
  158. all_below_low_threshold = True
  159. all_higher_high_threshold = True
  160. result = []
  161. for item in data:
  162. parent_id = item[self._index_parent_id]
  163. if parent_id is None:
  164. root_node = item
  165. continue
  166. # current usage rate compared to the threshold
  167. cur_usage_rate = item[self._index_output_queue_usage_rate]
  168. is_low = False
  169. if cur_usage_rate < low_threshold:
  170. is_low = True
  171. else:
  172. all_below_low_threshold = False
  173. if cur_usage_rate < high_threshold:
  174. all_higher_high_threshold = False
  175. # the child node usage rate compared to the threshold
  176. child_ids = item[self._index_children_id]
  177. if not child_ids:
  178. leaf_nodes.append(item)
  179. continue
  180. child_usage_rates = [
  181. self._get_usage_rate_by_op_id(op_id) for op_id in child_ids
  182. ]
  183. is_high = True
  184. for usage_rate in child_usage_rates:
  185. if usage_rate < high_threshold:
  186. is_high = False
  187. break
  188. if is_high and is_low:
  189. result.append(item)
  190. if all_below_low_threshold:
  191. result = leaf_nodes
  192. elif all_higher_high_threshold:
  193. result = [root_node]
  194. return result
  195. def _get_usage_rate_by_op_id(self, op_id):
  196. """
  197. Gets the usage rate of the queue corresponding to the specified operator.
  198. Args:
  199. op_id (int): The pipeline operator ID.
  200. Returns:
  201. float, the usage rate of the queue corresponding to the specified
  202. operator.
  203. """
  204. index = self._op_id_index_map.get(op_id)
  205. op_info = self._data[index]
  206. return op_info[self._index_output_queue_usage_rate]
  207. def _set_display_col_name(self, is_display_op_detail):
  208. """
  209. Set the display column name according to the filter condition.
  210. Args:
  211. is_display_op_detail (bool): Whether to display the detailed operator
  212. information.
  213. """
  214. if not is_display_op_detail:
  215. self._display_col_names = self._col_names[0:2]
  216. self._display_col_names.extend(self._col_names[4:])
  217. def _convert_field_type(self, row):
  218. """
  219. Convert the field type of minddata pipeline file to the specific type.
  220. Args:
  221. row (list[str]): One row data from parsed data.
  222. Returns:
  223. list[Union[str, int, float]], the converted data.
  224. """
  225. return [
  226. int(row[self._index_op_id]),
  227. row[self._index_op_type],
  228. int(row[self._index_num_workers]),
  229. json.loads(row[self._index_output_queue_size])
  230. if row[self._index_output_queue_size] else None,
  231. float(row[self._index_output_queue_average_size])
  232. if row[self._index_output_queue_average_size] else None,
  233. int(row[self._index_output_queue_length])
  234. if row[self._index_output_queue_length] else None,
  235. float(row[self._index_output_queue_usage_rate])
  236. if row[self._index_output_queue_usage_rate] else None,
  237. int(row[self._index_sample_interval]),
  238. int(row[self._index_parent_id])
  239. if row[self._index_parent_id] else None,
  240. json.loads(row[self._index_children_id])
  241. if row[self._index_children_id] else None
  242. ]
  243. def _get_op_id_index_map(self):
  244. """
  245. Get the map of the operator id and index in data.
  246. Returns:
  247. dict, the map of the operator id and index in data.
  248. """
  249. the_map = {}
  250. for index, op_info in enumerate(self._data):
  251. the_map[op_info[self._index_op_id]] = index
  252. return the_map