You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optime_parser.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Op compute time files parser."""
  16. import os
  17. from tabulate import tabulate
  18. from mindinsight.profiler.common._utils import fwrite_format
  19. from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
  20. from mindinsight.profiler.common.log import logger
  21. from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path
  22. from mindinsight.profiler.parser.container import HWTSContainer
  23. class OPComputeTimeParser:
  24. """
  25. Join hwts info and framework info, get op time info, and output to the result file.
  26. Args:
  27. hwts_output_file (str): The file path of hwts_output_file. Such as: './output_format_data_hwts_0.txt".
  28. output_filename (str): The output data file path and name. Such as: './output_op_compute_time_0.txt'.
  29. op_task_info (dict): The task and op relation info. The format: {task_id, [opname, stream_id, block dim]}.
  30. """
  31. _dst_file_title = 'title:op compute time'
  32. _dst_file_column_title = ['op_name', 'compute_time(ms)', 'stream_id']
  33. _timeline_file_title = 'title:timeline info'
  34. _timeline_file_column_title = ['op_name', 'stream_id', 'start_time', 'duration']
  35. def __init__(self, hwts_output_file, output_filename, op_task_info,
  36. output_path, device_id):
  37. hwts_output_file = validate_and_normalize_path(
  38. hwts_output_file, raise_key='Invalid hwts output file path.'
  39. )
  40. self._hwts_output_file = hwts_output_file
  41. self._output_filename = output_filename
  42. self._op_task_info = op_task_info
  43. self._output_path = output_path
  44. self._device_id = device_id
  45. self._min_cycle_counter = float("inf")
  46. def _get_op_task_id_map(self):
  47. """
  48. Read hwts data file, get the task time info.
  49. Returns:
  50. list: all hwts task time info.
  51. """
  52. op_map_result = []
  53. hwts_list = []
  54. if not os.path.exists(self._hwts_output_file):
  55. logger.error('The hwts output file does not exist.')
  56. raise ProfilerFileNotFoundException('hwts output file')
  57. with open(self._hwts_output_file, 'r') as data_file:
  58. lines = data_file.readlines()
  59. for line in lines:
  60. if line.startswith("Start of task") or line.startswith("End of task"):
  61. line_split = line.split()
  62. container = HWTSContainer(line_split)
  63. hwts_list.append(container)
  64. # hwts op map by taskId
  65. for hwts in hwts_list:
  66. if hwts.task_id in self._op_task_info.keys():
  67. hwts.op_name = self._op_task_info[hwts.task_id]
  68. op_map_result.append(hwts)
  69. return op_map_result
  70. def execute(self):
  71. """Execute the parser, compute all op, get op time, and write it to the output file."""
  72. # Calculate the execution time of operators,
  73. # and update the minimum cycle counter.
  74. tmp_result_data = self._calculate_op_execution_time()
  75. # Convert time units from nanoseconds to milliseconds.
  76. # The unit of the cycle counter is 10 nanoseconds.
  77. op_name_time_dict = {}
  78. op_name_stream_dict = {}
  79. op_name_count_dict = {}
  80. op_name_task_dict = {}
  81. op_name_start_time = {}
  82. self._convert_op_time_unit(
  83. tmp_result_data, op_name_time_dict, op_name_stream_dict,
  84. op_name_count_dict, op_name_task_dict, op_name_start_time
  85. )
  86. result_data = []
  87. for op_name, time in op_name_time_dict.items():
  88. if op_name in op_name_stream_dict.keys():
  89. stream_id = op_name_stream_dict[op_name]
  90. avg_time = time / op_name_count_dict[op_name]
  91. result_data.append([op_name, avg_time, stream_id])
  92. timeline_data = []
  93. for op_name, time in op_name_time_dict.items():
  94. if op_name in op_name_stream_dict.keys():
  95. stream_id = op_name_stream_dict[op_name]
  96. start_time_list = op_name_start_time.get(op_name)
  97. for (start_time, duration) in start_time_list:
  98. timeline_data.append([op_name, stream_id, start_time, duration])
  99. # Write the metadata of operators into the file,
  100. # including operator name, average time, and stream id.
  101. self._write_op_time_into_file(result_data)
  102. # Write the timeline data into file,
  103. # including operator name, stream id, start time, and duration.
  104. self._write_timeline_data_into_file(timeline_data)
  105. # Write the minimum cycle counter into the file.
  106. self.write_min_cycle_counter_to_file()
  107. def _write_op_time_into_file(self, result_data):
  108. """
  109. Write the metadata of operators into the file, including
  110. op name, average time, and stream id.
  111. Args:
  112. result_data (list): The metadata to be written into the file.
  113. [
  114. ['op_name_1', 'avg_time_1', 'stream_id_1'],
  115. ['op_name_2', 'avg_time_2', 'stream_id_2'],
  116. [...]
  117. ]
  118. """
  119. result_data.sort(key=lambda x: x[0])
  120. total_time = 0
  121. for item in result_data:
  122. total_time += item[1]
  123. result_data.append(["total op", total_time, 0])
  124. fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
  125. fwrite_format(self._output_filename, data_source=tabulate(result_data,
  126. self._dst_file_column_title,
  127. tablefmt='simple'))
  128. def _write_timeline_data_into_file(self, timeline_data):
  129. """
  130. Write the timeline information into the file, including
  131. operator name, stream id, start time and duration.
  132. Args:
  133. timeline_data (list): The metadata to be written into the file.
  134. [
  135. ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
  136. ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
  137. [...]
  138. ]
  139. """
  140. # sorted by start times
  141. timeline_data.sort(key=lambda x: float(x[2]))
  142. filename = 'output_timeline_data_{}.txt'.format(self._device_id)
  143. file_path = os.path.join(self._output_path, filename)
  144. file_path = validate_and_normalize_path(file_path, raise_key='Invalid file path of timeline data.')
  145. # write to file
  146. fwrite_format(file_path, data_source=self._timeline_file_title, is_start=True)
  147. fwrite_format(file_path, data_source=tabulate(
  148. timeline_data, self._timeline_file_column_title, tablefmt='simple'
  149. ))
  150. def _calculate_op_execution_time(self):
  151. """
  152. Calculate the execution time of each operator.
  153. Returns:
  154. list, including the intermediate data of op execution time.
  155. """
  156. tmp_result_data = []
  157. op_map_list = self._get_op_task_id_map()
  158. cur_index = 0
  159. length = len(op_map_list)
  160. min_cycle_counter = float("inf")
  161. while cur_index < length:
  162. if cur_index + 1 == length:
  163. break
  164. op_start = op_map_list[cur_index]
  165. op_end = op_map_list[cur_index + 1]
  166. if op_start.status == "Start" and op_end.status == "End" \
  167. and op_start.op_name == op_end.op_name:
  168. op_start.duration = op_end.cycle_counter - op_start.cycle_counter
  169. tmp_result_data.append(op_start)
  170. cur_index += 2
  171. if not op_start.op_name.startswith("assign"):
  172. min_cycle_counter = min(min_cycle_counter, op_start.cycle_counter)
  173. else:
  174. cur_index += 1
  175. # Update the value of minimum cycle counter.
  176. self._min_cycle_counter = min_cycle_counter
  177. return tmp_result_data
  178. def _convert_op_time_unit(self, op_data_list, op_name_time_dict, op_name_stream_dict,
  179. op_name_count_dict, op_name_task_dict, op_name_start_time):
  180. """
  181. Calculate the execution time of operator and convert it into millisecond.
  182. Args:
  183. op_data_list (list): The list of operator metadata.
  184. op_name_time_dict (dict): The mapping relation of operator name and its execution time.
  185. op_name_stream_dict (dict): The mapping relation of operator name and its stream id.
  186. op_name_count_dict (dict): The mapping relation of operator name and its count.
  187. op_name_task_dict (dict): The mapping relation of operator name and its task id.
  188. op_name_start_time (dict): The mapping relation of operator name and its start time.
  189. """
  190. factor = 1e5
  191. for item in op_data_list:
  192. op_name = item.op_name
  193. # Unit conversion: converting the cycle counter into ms.
  194. op_start_time_str = str((item.cycle_counter - self._min_cycle_counter) / factor)
  195. op_duration = item.duration / factor
  196. op_duration_str = str(item.duration / factor)
  197. if op_name in op_name_time_dict.keys():
  198. op_name_time_dict[op_name] += op_duration
  199. if item.task_id == op_name_task_dict[op_name]:
  200. op_name_count_dict[op_name] += 1
  201. op_name_start_time[op_name].append(
  202. (op_start_time_str, op_duration_str)
  203. )
  204. else:
  205. op_name_time_dict[op_name] = op_duration
  206. op_name_stream_dict[op_name] = item.stream_id
  207. op_name_task_dict[op_name] = item.task_id
  208. op_name_count_dict[op_name] = 1
  209. op_name_start_time[op_name] = []
  210. op_name_start_time[op_name].append(
  211. (op_start_time_str, op_duration_str)
  212. )
  213. def write_min_cycle_counter_to_file(self):
  214. """Write minimum cycle counter into a txt file."""
  215. min_cycle_counter = self._min_cycle_counter
  216. file_name = 'min_cycle_counter_' + self._device_id + '.txt'
  217. file_path = os.path.join(self._output_path, file_name)
  218. file_path = validate_and_normalize_path(
  219. file_path, raise_key='Invalid min cycle counter file path.'
  220. )
  221. with open(file_path, 'w') as file:
  222. file.write(str(min_cycle_counter))