# Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Op compute time files parser.""" import os from tabulate import tabulate from mindinsight.profiler.common._utils import fwrite_format from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException from mindinsight.profiler.common.log import logger from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path from mindinsight.profiler.parser.container import HWTSContainer class OPComputeTimeParser: """ Join hwts info and framework info, get op time info, and output to the result file. Args: hwts_output_file (str): The file path of hwts_output_file. Such as: './output_format_data_hwts_0.txt". output_filename (str): The output data file path and name. Such as: './output_op_compute_time_0.txt'. op_task_info (dict): The task and op relation info. The format: {task_id, [opname, stream_id, block dim]}. """ _dst_file_title = 'title:op compute time' _dst_file_column_title = ['op_name', 'compute_time(ms)', 'stream_id'] _timeline_file_title = 'title:timeline info' _timeline_file_column_title = ['op_name', 'stream_id', 'start_time', 'duration'] def __init__(self, hwts_output_file, output_filename, op_task_info, output_path, device_id): hwts_output_file = validate_and_normalize_path( hwts_output_file, raise_key='Invalid hwts output file path.' ) self._hwts_output_file = hwts_output_file self._output_filename = output_filename self._op_task_info = op_task_info self._output_path = output_path self._device_id = device_id self._min_cycle_counter = float("inf") def _get_op_task_id_map(self): """ Read hwts data file, get the task time info. Returns: list: all hwts task time info. """ op_map_result = [] hwts_list = [] if not os.path.exists(self._hwts_output_file): logger.error('The hwts output file does not exist.') raise ProfilerFileNotFoundException('hwts output file') with open(self._hwts_output_file, 'r') as data_file: lines = data_file.readlines() for line in lines: if line.startswith("Start of task") or line.startswith("End of task"): line_split = line.split() container = HWTSContainer(line_split) hwts_list.append(container) # hwts op map by taskId for hwts in hwts_list: if hwts.task_id in self._op_task_info.keys(): hwts.op_name = self._op_task_info[hwts.task_id] op_map_result.append(hwts) return op_map_result def execute(self): """Execute the parser, compute all op, get op time, and write it to the output file.""" # Calculate the execution time of operators, # and update the minimum cycle counter. tmp_result_data = self._calculate_op_execution_time() # Convert time units from nanoseconds to milliseconds. # The unit of the cycle counter is 10 nanoseconds. op_name_time_dict = {} op_name_stream_dict = {} op_name_count_dict = {} op_name_task_dict = {} op_name_start_time = {} self._convert_op_time_unit( tmp_result_data, op_name_time_dict, op_name_stream_dict, op_name_count_dict, op_name_task_dict, op_name_start_time ) result_data = [] for op_name, time in op_name_time_dict.items(): if op_name in op_name_stream_dict.keys(): stream_id = op_name_stream_dict[op_name] avg_time = time / op_name_count_dict[op_name] result_data.append([op_name, avg_time, stream_id]) timeline_data = [] for op_name, time in op_name_time_dict.items(): if op_name in op_name_stream_dict.keys(): stream_id = op_name_stream_dict[op_name] start_time_list = op_name_start_time.get(op_name) for (start_time, duration) in start_time_list: timeline_data.append([op_name, stream_id, start_time, duration]) # Write the metadata of operators into the file, # including operator name, average time, and stream id. self._write_op_time_into_file(result_data) # Write the timeline data into file, # including operator name, stream id, start time, and duration. self._write_timeline_data_into_file(timeline_data) # Write the minimum cycle counter into the file. self.write_min_cycle_counter_to_file() def _write_op_time_into_file(self, result_data): """ Write the metadata of operators into the file, including op name, average time, and stream id. Args: result_data (list): The metadata to be written into the file. [ ['op_name_1', 'avg_time_1', 'stream_id_1'], ['op_name_2', 'avg_time_2', 'stream_id_2'], [...] ] """ result_data.sort(key=lambda x: x[0]) total_time = 0 for item in result_data: total_time += item[1] result_data.append(["total op", total_time, 0]) fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True) fwrite_format(self._output_filename, data_source=tabulate(result_data, self._dst_file_column_title, tablefmt='simple')) def _write_timeline_data_into_file(self, timeline_data): """ Write the timeline information into the file, including operator name, stream id, start time and duration. Args: timeline_data (list): The metadata to be written into the file. [ ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'], ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'], [...] ] """ # sorted by start times timeline_data.sort(key=lambda x: float(x[2])) filename = 'output_timeline_data_{}.txt'.format(self._device_id) file_path = os.path.join(self._output_path, filename) file_path = validate_and_normalize_path(file_path, raise_key='Invalid file path of timeline data.') # write to file fwrite_format(file_path, data_source=self._timeline_file_title, is_start=True) fwrite_format(file_path, data_source=tabulate( timeline_data, self._timeline_file_column_title, tablefmt='simple' )) def _calculate_op_execution_time(self): """ Calculate the execution time of each operator. Returns: list, including the intermediate data of op execution time. """ tmp_result_data = [] op_map_list = self._get_op_task_id_map() cur_index = 0 length = len(op_map_list) min_cycle_counter = float("inf") while cur_index < length: if cur_index + 1 == length: break op_start = op_map_list[cur_index] op_end = op_map_list[cur_index + 1] if op_start.status == "Start" and op_end.status == "End" \ and op_start.op_name == op_end.op_name: op_start.duration = op_end.cycle_counter - op_start.cycle_counter tmp_result_data.append(op_start) cur_index += 2 if not op_start.op_name.startswith("assign"): min_cycle_counter = min(min_cycle_counter, op_start.cycle_counter) else: cur_index += 1 # Update the value of minimum cycle counter. self._min_cycle_counter = min_cycle_counter return tmp_result_data def _convert_op_time_unit(self, op_data_list, op_name_time_dict, op_name_stream_dict, op_name_count_dict, op_name_task_dict, op_name_start_time): """ Calculate the execution time of operator and convert it into millisecond. Args: op_data_list (list): The list of operator metadata. op_name_time_dict (dict): The mapping relation of operator name and its execution time. op_name_stream_dict (dict): The mapping relation of operator name and its stream id. op_name_count_dict (dict): The mapping relation of operator name and its count. op_name_task_dict (dict): The mapping relation of operator name and its task id. op_name_start_time (dict): The mapping relation of operator name and its start time. """ factor = 1e5 for item in op_data_list: op_name = item.op_name # Unit conversion: converting the cycle counter into ms. op_start_time_str = str((item.cycle_counter - self._min_cycle_counter) / factor) op_duration = item.duration / factor op_duration_str = str(item.duration / factor) if op_name in op_name_time_dict.keys(): op_name_time_dict[op_name] += op_duration if item.task_id == op_name_task_dict[op_name]: op_name_count_dict[op_name] += 1 op_name_start_time[op_name].append( (op_start_time_str, op_duration_str) ) else: op_name_time_dict[op_name] = op_duration op_name_stream_dict[op_name] = item.stream_id op_name_task_dict[op_name] = item.task_id op_name_count_dict[op_name] = 1 op_name_start_time[op_name] = [] op_name_start_time[op_name].append( (op_start_time_str, op_duration_str) ) def write_min_cycle_counter_to_file(self): """Write minimum cycle counter into a txt file.""" min_cycle_counter = self._min_cycle_counter file_name = 'min_cycle_counter_' + self._device_id + '.txt' file_path = os.path.join(self._output_path, file_name) file_path = validate_and_normalize_path( file_path, raise_key='Invalid min cycle counter file path.' ) with open(file_path, 'w') as file: file.write(str(min_cycle_counter))