You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

timeline_analyser.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """The Timeline Analyser."""
  16. import json
  17. import os
  18. from mindinsight.profiler.analyser.base_analyser import BaseAnalyser
  19. from mindinsight.profiler.parser.container import TimelineContainer
  20. from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
  21. ProfilerIOException
  22. from mindinsight.profiler.common.log import logger
  23. from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path
  24. SIZE_LIMIT = 20 * 1024 * 1024 # 20MB
  25. class TimelineAnalyser(BaseAnalyser):
  26. """
  27. Analyse timeline data from file.
  28. """
  29. __col_names__ = ['op_name', 'stream_id', 'start_time', 'duration']
  30. _output_timeline_data_file_path = 'output_timeline_data_{}.txt'
  31. _min_cycle_counter_file_path = 'min_cycle_counter_{}.txt'
  32. _display_filename = 'timeline_display_{}.json'
  33. _timeline_summary_filename = 'timeline_summary_{}.json'
  34. _timeline_meta = []
  35. _timeline_summary = {
  36. 'total_time': 0,
  37. 'num_of_streams': 0,
  38. 'num_of_ops': 0,
  39. 'op_exe_times': 0
  40. }
  41. def _load(self):
  42. """Load data according to the parsed profiling files."""
  43. def _filter(self, filter_condition):
  44. """
  45. Filter the profiling data according to the filter condition.
  46. Args:
  47. filter_condition (dict): The filter condition.
  48. """
  49. def get_display_timeline(self):
  50. """
  51. Get timeline data for UI display.
  52. Returns:
  53. json, the content of timeline data.
  54. """
  55. # Search timeline json file under profiling dir.
  56. display_filename = self._display_filename.format(self._device_id)
  57. # Check if there is a timeline json file for display
  58. file_path = os.path.join(self._profiling_dir, display_filename)
  59. file_path = validate_and_normalize_path(
  60. file_path, raise_key='Invalid timeline json path.'
  61. )
  62. timeline = []
  63. if os.path.exists(file_path):
  64. try:
  65. with open(file_path, 'r') as f_obj:
  66. timeline = json.load(f_obj)
  67. except (IOError, OSError) as err:
  68. logger.error('Error occurred when read timeline display file: %s', err)
  69. raise ProfilerIOException
  70. else:
  71. logger.info('No timeline file. Please check the output path.')
  72. return timeline
  73. def get_timeline_summary(self):
  74. """
  75. Get timeline summary information for UI display.
  76. Returns:
  77. json, the content of timeline summary information.
  78. """
  79. file_path = None
  80. summary_file_name = 'timeline_summary_{}.json'.format(self._device_id)
  81. if summary_file_name in os.listdir(self._profiling_dir):
  82. file_path = os.path.join(self._profiling_dir, summary_file_name)
  83. file_path = validate_and_normalize_path(
  84. file_path, raise_key='Invalid timeline summary path.'
  85. )
  86. timeline_summary = {}
  87. if os.path.exists(file_path):
  88. try:
  89. with open(file_path, 'r') as f_obj:
  90. timeline_summary = json.load(f_obj)
  91. except (IOError, OSError) as err:
  92. logger.error('Error occurred when read timeline summary file: %s', err)
  93. raise ProfilerIOException
  94. return timeline_summary
  95. def write_timeline(self):
  96. """Load data according to the parsed profiling files."""
  97. # Write timeline to file.
  98. logger.info('Writing timeline file...')
  99. self.write_timeline_to_json_by_limitation()
  100. logger.info('Finished file writing!')
  101. def write_timeline_to_json_by_limitation(self):
  102. """Write timeline to json by limitation."""
  103. display_filename = self._display_filename.format(self._device_id)
  104. display_file_path = os.path.join(
  105. self._profiling_dir,
  106. display_filename
  107. )
  108. display_file_path = validate_and_normalize_path(
  109. display_file_path, raise_key='Invalid timeline display json path.'
  110. )
  111. try:
  112. with open(display_file_path, 'w') as json_file:
  113. json_file.write('[')
  114. for item in self._timeline_meta:
  115. json.dump(item, json_file)
  116. file_size = os.path.getsize(display_file_path)
  117. if file_size > SIZE_LIMIT:
  118. break
  119. json_file.write(',')
  120. json_file.write(']')
  121. except (IOError, OSError) as err:
  122. logger.error('Error occurred when write timeline display file: %s', err)
  123. raise ProfilerIOException
  124. def write_timeline_summary(self):
  125. """Write timeline summary to json."""
  126. timeline_summary_file_path = os.path.join(
  127. self._profiling_dir,
  128. self._timeline_summary_filename.format(self._device_id)
  129. )
  130. timeline_summary_file_path = validate_and_normalize_path(
  131. timeline_summary_file_path, raise_key='Invalid timeline summary path.'
  132. )
  133. try:
  134. with open(timeline_summary_file_path, 'w') as json_file:
  135. json.dump(self._timeline_summary, json_file)
  136. except (IOError, OSError) as err:
  137. logger.error('Error occurred when write timeline summary file: %s', err)
  138. raise ProfilerIOException
  139. def _load_timeline_data(self):
  140. """Load timeline data from file."""
  141. file_path = os.path.join(
  142. self._profiling_dir,
  143. self._output_timeline_data_file_path.format(self._device_id)
  144. )
  145. file_path = validate_and_normalize_path(
  146. file_path, raise_key='Invalid timeline txt file path.'
  147. )
  148. if not os.path.exists(file_path):
  149. logger.error("Failed to find parsed timeline file.")
  150. raise ProfilerFileNotFoundException('parsed timeline file')
  151. timeline_list = []
  152. try:
  153. with open(file_path, 'r') as f_obj:
  154. for line in f_obj:
  155. if not line.startswith('op_name'):
  156. line_list = line.strip('\n').split(',')
  157. timeline_list.append(line_list)
  158. except (IOError, OSError) as err:
  159. logger.error('Error occurred when read timeline intermediate file: %s', err)
  160. raise ProfilerIOException
  161. return timeline_list
  162. def _parse_timeline_data(self, timeline):
  163. """Parse timeline data."""
  164. # factor to convert the time unit from 1ms to 1us for timeline display
  165. factor = 1000
  166. op_meta = TimelineContainer(timeline)
  167. timeline_dict = {}
  168. timeline_dict['name'] = op_meta.op_name
  169. timeline_dict['ph'] = 'X'
  170. timeline_dict['tid'] = op_meta.stream_id
  171. timeline_dict['ts'] = op_meta.start_time * factor
  172. dur = op_meta.duration * factor
  173. timeline_dict['dur'] = dur
  174. if op_meta.pid == 10000: # AllReduce PID
  175. timeline_dict['pid'] = 10000
  176. else:
  177. timeline_dict['pid'] = int(self._device_id)
  178. # Update total time of operator execution.
  179. self._timeline_summary['total_time'] += dur
  180. self._timeline_meta.append(timeline_dict)
  181. @staticmethod
  182. def _update_num_of_streams(timeline, stream_count_dict):
  183. """Update number of streams."""
  184. stream_id = timeline[1]
  185. if stream_id not in stream_count_dict.keys():
  186. stream_count_dict[stream_id] = 1
  187. else:
  188. stream_count_dict[stream_id] += 1
  189. def get_min_cycle_counter(self):
  190. """
  191. Get minimum cycle counter.
  192. Returns:
  193. float, the minimum value of the cycle counter.
  194. """
  195. file_path = os.path.join(
  196. self._profiling_dir,
  197. self._min_cycle_counter_file_path.format(self._device_id)
  198. )
  199. file_path = validate_and_normalize_path(
  200. file_path, raise_key='Invalid min cycle counter file path.'
  201. )
  202. if os.path.exists(file_path):
  203. try:
  204. with open(file_path, 'r') as f_obj:
  205. min_cycle_counter = f_obj.read()
  206. min_cycle_counter = float(min_cycle_counter) \
  207. if not min_cycle_counter == 'inf' else 0
  208. except (IOError, OSError) as err:
  209. logger.error('Error occurred when read minimum cycle counter: %s', err)
  210. raise ProfilerIOException
  211. else:
  212. min_cycle_counter = 0
  213. logger.info("No min cycle counter recorded.")
  214. return min_cycle_counter
  215. def init_timeline(self, all_reduce_info, framework_info):
  216. """
  217. Init timeline metadata, adding all collected info.
  218. Args:
  219. all_reduce_info (list[list]): The metadata of AllReduce operator.
  220. framework_info (dict): The framework metadata.
  221. """
  222. logger.info('Initiating timeline...')
  223. timeline_list = self._load_timeline_data()
  224. self._timeline_summary['op_exe_times'] = len(timeline_list)
  225. # Add AllReduce info to timeline temp list and sort by start time.
  226. if all_reduce_info:
  227. logger.debug('AllReduce info found. Start adding info into timeline...')
  228. timeline_list.extend(all_reduce_info)
  229. timeline_list.sort(key=lambda x: float(x[2]))
  230. # Init a dict for counting the num of streams.
  231. stream_count_dict = {}
  232. for timeline in timeline_list:
  233. self._parse_timeline_data(timeline)
  234. # Updating the collection of streams.
  235. if len(timeline) == 4:
  236. self._update_num_of_streams(timeline, stream_count_dict)
  237. # Get framework metadata.
  238. framework_obj_list = framework_info.get('object')
  239. # The length of list is the number of operators.
  240. self._timeline_summary['num_of_ops'] = len(framework_obj_list)
  241. self._add_framework_info(framework_obj_list)
  242. logger.info('Finished adding info into timeline...')
  243. # Update timeline summary info
  244. self._timeline_summary['num_of_streams'] = len(stream_count_dict.keys())
  245. def _add_framework_info(self, framework_obj_list):
  246. """
  247. Add framework info into timeline metadata.
  248. Args:
  249. framework_obj_list (list): The framework metadata.
  250. """
  251. logger.debug('Start adding framework info into timeline...')
  252. # Get the framework info that will be written into timeline.
  253. framework_info_dict = {}
  254. for framework_obj in framework_obj_list:
  255. op_name = framework_obj[0]
  256. op_type = framework_obj[1]
  257. op_full_name = framework_obj[4]
  258. op_info = framework_obj[5]
  259. framework_info_dict[op_full_name] = {
  260. 'name': op_name,
  261. 'args': {
  262. 'type': op_type,
  263. 'fullname': op_full_name
  264. }
  265. }
  266. framework_info_dict[op_full_name]['args'].update(op_info)
  267. # Insert framework info into timeline.
  268. for timeline_item in self._timeline_meta:
  269. op_full_name = timeline_item.get('name')
  270. framework_item = framework_info_dict.get(op_full_name)
  271. if framework_item:
  272. timeline_item['name'] = framework_item.get('name')
  273. timeline_item['args'] = framework_item.get('args')
  274. logger.debug('Finished adding framework info into timeline...')