| @@ -15,12 +15,13 @@ | |||||
| """The StepTraceAnalyser analyser class.""" | """The StepTraceAnalyser analyser class.""" | ||||
| import csv | import csv | ||||
| from mindinsight.datavisual.utils.tools import to_int | |||||
| from mindinsight.profiler.analyser.base_analyser import BaseAnalyser | from mindinsight.profiler.analyser.base_analyser import BaseAnalyser | ||||
| from mindinsight.profiler.common.exceptions.exceptions import ProfilerParamValueErrorException, \ | from mindinsight.profiler.common.exceptions.exceptions import ProfilerParamValueErrorException, \ | ||||
| ProfilerFileNotFoundException, StepNumNotSupportedException | ProfilerFileNotFoundException, StepNumNotSupportedException | ||||
| from mindinsight.profiler.common.log import logger as log | from mindinsight.profiler.common.log import logger as log | ||||
| from mindinsight.profiler.common.util import query_latest_trace_time_file, get_field_value, \ | from mindinsight.profiler.common.util import query_latest_trace_time_file, get_field_value, \ | ||||
| get_summary_for_step_trace | |||||
| get_summary_for_step_trace, to_millisecond | |||||
| class StepTraceAnalyser(BaseAnalyser): | class StepTraceAnalyser(BaseAnalyser): | ||||
| @@ -53,6 +54,7 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| if condition is None: | if condition is None: | ||||
| condition = {} | condition = {} | ||||
| filter_condition = condition.get('filter_condition', {}) | filter_condition = condition.get('filter_condition', {}) | ||||
| log.info("Receive query request. %s", filter_condition) | |||||
| self._validate_filter_condition(filter_condition) | self._validate_filter_condition(filter_condition) | ||||
| self._result = {'size': self._size} | self._result = {'size': self._size} | ||||
| self._filter(filter_condition) | self._filter(filter_condition) | ||||
| @@ -64,12 +66,14 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| Query for all reduce info. | Query for all reduce info. | ||||
| Returns: | Returns: | ||||
| list[dict], each item is the reduce info for one step, the reduce info is format like: | |||||
| {stream_id: List[Tuple(start_point, end_point, duration, field_name)]}. | |||||
| list[dict], reduce information. Each item is the reduce info for one step. | |||||
| The reduce info is format like: | |||||
| {stream_id: List[Tuple(start_point, end_point, duration, field_name)]}. | |||||
| """ | """ | ||||
| reduce_infos = [] | reduce_infos = [] | ||||
| for row_info in self._data[:-1]: | for row_info in self._data[:-1]: | ||||
| reduce_info = self._get_reduce_time_in_order(row_info, 'systime') | |||||
| row_info_dict = self._get_info_dict_from_row_data(row_info, 'systime') | |||||
| reduce_info = self._get_reduce_time_in_order(row_info_dict) | |||||
| reduce_infos.append(reduce_info) | reduce_infos.append(reduce_info) | ||||
| return reduce_infos | return reduce_infos | ||||
| @@ -117,11 +121,16 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| def _construct_time_point(self, name, start, duration): | def _construct_time_point(self, name, start, duration): | ||||
| """Construct time point.""" | """Construct time point.""" | ||||
| point = { | |||||
| self._attr_ui_name: name, | |||||
| self._attr_ui_start: round(start, 4), | |||||
| self._attr_ui_duration: round(duration, 4) | |||||
| } | |||||
| point = {} | |||||
| if start >= 0 and duration >= 0: | |||||
| point = { | |||||
| self._attr_ui_name: name, | |||||
| self._attr_ui_start: round(start, 4), | |||||
| self._attr_ui_duration: round(duration, 4) | |||||
| } | |||||
| else: | |||||
| log.warning("Not invalid point info: " | |||||
| "name: %s, start: %s, duration: %s", name, start, duration) | |||||
| return point | return point | ||||
| def _get_step_details(self, step_id, time_type='realtime'): | def _get_step_details(self, step_id, time_type='realtime'): | ||||
| @@ -137,59 +146,105 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| if step_id is None: | if step_id is None: | ||||
| step_id = 0 | step_id = 0 | ||||
| row_info = self._data[step_id - 1] | row_info = self._data[step_id - 1] | ||||
| start_point = get_field_value(row_info, 'start_point', self.__column__, time_type) | |||||
| total = get_field_value(row_info, 'total', self.__column__, time_type) | |||||
| iteration_interval = get_field_value(row_info, 'iteration_interval', self.__column__, | |||||
| time_type) | |||||
| fp_point = get_field_value(row_info, 'fp_point', self.__column__, time_type) | |||||
| fp_and_bp = get_field_value(row_info, 'fp_and_bp', self.__column__, time_type) | |||||
| bp_point = get_field_value(row_info, 'bp_point', self.__column__, time_type) | |||||
| tail = get_field_value(row_info, 'tail', self.__column__, time_type) | |||||
| row_info_dict = self._get_info_dict_from_row_data(row_info, time_type) | |||||
| # first line only contains total time | # first line only contains total time | ||||
| first_line = [self._construct_time_point('', 0, total)] | |||||
| first_line = [self._construct_time_point('', 0, row_info_dict.get('total', 0))] | |||||
| # second line contains iteration_interval, fp_and_bp and tail | # second line contains iteration_interval, fp_and_bp and tail | ||||
| second_line = [ | |||||
| self._construct_time_point('', 0, iteration_interval), | |||||
| self._construct_time_point('fp_and_bp', fp_point - start_point, fp_and_bp), | |||||
| self._construct_time_point('', bp_point - start_point, tail), | |||||
| ] | |||||
| second_line = self._get_main_proc_points(row_info_dict) | |||||
| # construct reduces lines | # construct reduces lines | ||||
| reduce_lines = self._construct_reduce_lines(row_info, time_type) | |||||
| reduce_lines = self._construct_reduce_lines(row_info_dict) | |||||
| graph = [first_line, second_line] | graph = [first_line, second_line] | ||||
| graph.extend(reduce_lines) | graph.extend(reduce_lines) | ||||
| self._result['training_trace_graph'] = graph | self._result['training_trace_graph'] = graph | ||||
| def _get_reduce_time_in_order(self, row_info, time_type): | |||||
| """Get reduce time in order.""" | |||||
| def _get_info_dict_from_row_data(self, row_info, time_type): | |||||
| """ | |||||
| Get step info in dict format. | |||||
| Args: | |||||
| row_info (list[str]): Step info, the value is corresponding to `__column__`. | |||||
| time_type (str): The value type. `systime` keeps the original value. | |||||
| `realtime` transforms the value in millisecond. Default: `realtime`. | |||||
| Returns: | |||||
| dict, step trace information. The key is in `__column__`. | |||||
| """ | |||||
| row_info_dict = {} | |||||
| for key, value in zip(self.__column__, row_info): | |||||
| if key == 'step_num': | |||||
| continue | |||||
| value = to_int(value, key) | |||||
| row_info_dict[key] = to_millisecond(value) if time_type == 'realtime' else value | |||||
| return row_info_dict | |||||
| def _get_main_proc_points(self, row_info_dict): | |||||
| """ | |||||
| Get iteration_interval, fp_and_bp and tail points. | |||||
| Args: | |||||
| row_info_dict (dict): Step trace information. | |||||
| Returns: | |||||
| list[dict], the list of time points. | |||||
| """ | |||||
| start_point = row_info_dict.get('start_point', 0) | |||||
| fp_point = row_info_dict.get('fp_point', 0) | |||||
| bp_point = row_info_dict.get('bp_point', 0) | |||||
| points = [ | |||||
| self._construct_time_point('', 0, row_info_dict.get('iteration_interval', 0)), | |||||
| self._construct_time_point('fp_and_bp', fp_point - start_point, | |||||
| row_info_dict.get('fp_and_bp', 0)), | |||||
| self._construct_time_point('', bp_point - start_point, row_info_dict.get('tail', 0)), | |||||
| ] | |||||
| return points | |||||
| def _get_reduce_time_in_order(self, row_info_dict): | |||||
| """ | |||||
| Get reduce time in order. | |||||
| Args: | |||||
| row_info_dict (dict): Step trace information. | |||||
| Returns: | |||||
| dict, sorted reduce information. The reduce info is format like: | |||||
| {stream_id: List[Tuple(start_point, end_point, duration, field_name)]} | |||||
| """ | |||||
| reduce_info = {} | reduce_info = {} | ||||
| reduce_fields = [field_name for field_name in self.__column__ | reduce_fields = [field_name for field_name in self.__column__ | ||||
| if field_name.startswith('stream_') and not field_name.endswith('point')] | if field_name.startswith('stream_') and not field_name.endswith('point')] | ||||
| for reduce_field in reduce_fields: | for reduce_field in reduce_fields: | ||||
| reduce_start = row_info_dict.get(reduce_field + '_start_point', 0) | |||||
| reduce_end = row_info_dict.get(reduce_field + '_end_point', 0) | |||||
| reduce_duration = row_info_dict.get(reduce_field, 0) | |||||
| if not (reduce_start and reduce_end and reduce_duration): | |||||
| log.info("Reduce event missing value.") | |||||
| continue | |||||
| cur_stream_id = reduce_field.split('_', 2)[1] | cur_stream_id = reduce_field.split('_', 2)[1] | ||||
| cur_stream = reduce_info.get(cur_stream_id) | cur_stream = reduce_info.get(cur_stream_id) | ||||
| if not cur_stream: | if not cur_stream: | ||||
| cur_stream = [] | cur_stream = [] | ||||
| reduce_info[cur_stream_id] = cur_stream | reduce_info[cur_stream_id] = cur_stream | ||||
| reduce_start = get_field_value( | |||||
| row_info, reduce_field + '_start_point', self.__column__, time_type) | |||||
| reduce_end = get_field_value( | |||||
| row_info, reduce_field + '_end_point', self.__column__, time_type) | |||||
| reduce_duration = get_field_value( | |||||
| row_info, reduce_field, self.__column__, time_type) | |||||
| cur_stream.append((reduce_start, reduce_end, reduce_duration, reduce_field)) | cur_stream.append((reduce_start, reduce_end, reduce_duration, reduce_field)) | ||||
| for _, reduce_events in reduce_info.items(): | for _, reduce_events in reduce_info.items(): | ||||
| reduce_events.sort(key=lambda elem: elem[1]) | reduce_events.sort(key=lambda elem: elem[1]) | ||||
| return reduce_info | return reduce_info | ||||
| def _construct_reduce_lines(self, row_info, time_type): | |||||
| """Contruct first line in detailed graph.""" | |||||
| def _construct_reduce_lines(self, row_info_dict): | |||||
| """ | |||||
| Contruct first line in detailed graph. | |||||
| Args: | |||||
| row_info_dict (dict): Step trace information. | |||||
| Returns: | |||||
| list, list of reduce information of each stream. Each item is a list of time points. | |||||
| """ | |||||
| reduce_lines = [] | reduce_lines = [] | ||||
| start_point = get_field_value(row_info, 'start_point', self.__column__, time_type) | |||||
| fp_point = get_field_value(row_info, 'fp_point', self.__column__, time_type) | |||||
| end_point = get_field_value(row_info, 'end_point', self.__column__, time_type) | |||||
| reduce_info = self._get_reduce_time_in_order(row_info, time_type) | |||||
| start_point = row_info_dict.get('start_point', 0) | |||||
| fp_point = row_info_dict.get('fp_point', 0) | |||||
| end_point = row_info_dict.get('end_point', 0) | |||||
| reduce_info = self._get_reduce_time_in_order(row_info_dict) | |||||
| # construct time point for each line | # construct time point for each line | ||||
| for _, reduce_events in reduce_info.items(): | for _, reduce_events in reduce_info.items(): | ||||
| current_line = self._construct_reduce_line( | current_line = self._construct_reduce_line( | ||||
| @@ -199,7 +254,19 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| return reduce_lines | return reduce_lines | ||||
| def _construct_reduce_line(self, start_point, end_point, fp_point, reduce_events): | def _construct_reduce_line(self, start_point, end_point, fp_point, reduce_events): | ||||
| """Construct list of time points for reduce line.""" | |||||
| """ | |||||
| Construct list of time points for reduce line. | |||||
| Args: | |||||
| start_point (int): The start point of current step. | |||||
| end_point (int): The end point of current step. | |||||
| fp_point (int): The fp point of current step. | |||||
| reduce_events (list[Tuple]): The reduce information of current step. Each item | |||||
| contains the start, end duration and name of one reduce event. | |||||
| Returns: | |||||
| list[dict], list of time points. | |||||
| """ | |||||
| current_line = [] | current_line = [] | ||||
| previous_start = fp_point | previous_start = fp_point | ||||
| for start, end, duration, field_name in reduce_events: | for start, end, duration, field_name in reduce_events: | ||||
| @@ -265,4 +332,4 @@ class StepTraceAnalyser(BaseAnalyser): | |||||
| if proc_name is None or isinstance(proc_name, str) and proc_name in accept_param: | if proc_name is None or isinstance(proc_name, str) and proc_name in accept_param: | ||||
| return | return | ||||
| log.error("Invalid param %s in request. Acceptable value is %s.", error_name, accept_param) | log.error("Invalid param %s in request. Acceptable value is %s.", error_name, accept_param) | ||||
| raise ProfilerParamValueErrorException("Invalid proc_name.") | |||||
| raise ProfilerParamValueErrorException(f"Invalid {error_name}.") | |||||
| @@ -21,6 +21,9 @@ import os | |||||
| from mindinsight.datavisual.utils.tools import to_int | from mindinsight.datavisual.utils.tools import to_int | ||||
| # one sys count takes 10 ns, 1 ms has 100000 system count | |||||
| PER_MS_SYSCNT = 100000 | |||||
| def analyse_device_list_from_profiler_dir(profiler_dir): | def analyse_device_list_from_profiler_dir(profiler_dir): | ||||
| """ | """ | ||||
| @@ -116,25 +119,28 @@ def calculate_percent(partial, total): | |||||
| return f'{percent}%' | return f'{percent}%' | ||||
| def to_millisecond(sys_count, limit=4): | |||||
| """Translate system count to millisecond.""" | |||||
| return round(sys_count / PER_MS_SYSCNT, limit) | |||||
| def get_field_value(row_info, field_name, header, time_type='realtime'): | def get_field_value(row_info, field_name, header, time_type='realtime'): | ||||
| """ | """ | ||||
| Extract basic info through row_info. | Extract basic info through row_info. | ||||
| Args: | Args: | ||||
| row_info (list): The list of data info in one row. | row_info (list): The list of data info in one row. | ||||
| header (list[str]): The list of field names. | |||||
| field_name (str): The name in header. | field_name (str): The name in header. | ||||
| header (list[str]): The list of field names. | |||||
| time_type (str): The type of value, `realtime` or `systime`. Default: `realtime`. | time_type (str): The type of value, `realtime` or `systime`. Default: `realtime`. | ||||
| Returns: | Returns: | ||||
| dict, step trace info in dict format. | dict, step trace info in dict format. | ||||
| """ | """ | ||||
| # one sys count takes 10 ns, 1 ms has 100000 syscnt | |||||
| per_ms_syscnt = 100000 | |||||
| field_index = header.index(field_name) | field_index = header.index(field_name) | ||||
| value = row_info[field_index] | value = row_info[field_index] | ||||
| value = to_int(value, field_name) | value = to_int(value, field_name) | ||||
| if time_type == 'realtime': | if time_type == 'realtime': | ||||
| value = round(value / per_ms_syscnt, 4) | |||||
| value = to_millisecond(value) | |||||
| return value | return value | ||||
| @@ -38,11 +38,12 @@ class StepTraceParser: | |||||
| Args: | Args: | ||||
| input_dir (str): The directory that contains original step trace data. | input_dir (str): The directory that contains original step trace data. | ||||
| output_file_path (str): The output file path. | output_file_path (str): The output file path. | ||||
| job_id (int): The job id used to define the start of new step. Default: 0. | |||||
| skip_first_step (bool): Whether skip the first step or not. | skip_first_step (bool): Whether skip the first step or not. | ||||
| """ | """ | ||||
| _event_size = 20 | _event_size = 20 | ||||
| def __init__(self, input_dir, output_file_path, job_id, skip_first_step=False): | |||||
| def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False): | |||||
| self._input_dir = input_dir | self._input_dir = input_dir | ||||
| self._output_path = output_file_path | self._output_path = output_file_path | ||||
| self._job_id = job_id | self._job_id = job_id | ||||
| @@ -125,7 +125,7 @@ class Profiler: | |||||
| self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') | self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') | ||||
| self._profiling_job_id = job_id | self._profiling_job_id = job_id | ||||
| # add job id env through user input later | # add job id env through user input later | ||||
| self._job_id_env = None | |||||
| self._job_id_env = 0 | |||||
| self._start_time = int(time.time() * 10000000) | self._start_time = int(time.time() * 10000000) | ||||
| logger.info("Profiling: profiling start time: %d", self._start_time) | logger.info("Profiling: profiling start time: %d", self._start_time) | ||||
| @@ -0,0 +1,20 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ST for profiler.""" | |||||
| import os | |||||
| RAW_DATA_BASE = os.path.realpath(os.path.join(os.path.dirname(__file__), '../../../utils/resource')) | |||||
| RAW_DATA = os.path.realpath(os.path.join(RAW_DATA_BASE, 'JOB1')) | |||||
| PROFILER_DIR = os.path.realpath(os.path.join(RAW_DATA_BASE, 'profiler')) | |||||
| @@ -0,0 +1,45 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """The st config.""" | |||||
| import os | |||||
| import shutil | |||||
| import sys | |||||
| import tempfile | |||||
| import pytest | |||||
| from tests.utils import mindspore | |||||
| sys.modules['mindspore'] = mindspore | |||||
| BASE_SUMMARY_DIR = tempfile.mkdtemp(prefix='test_profiler_summary_dir_base_') | |||||
| @pytest.fixture(scope="session") | |||||
| def create_summary_dir(): | |||||
| """Create summary directory for profiler module.""" | |||||
| try: | |||||
| if os.path.exists(BASE_SUMMARY_DIR): | |||||
| shutil.rmtree(BASE_SUMMARY_DIR) | |||||
| permissions = os.R_OK | os.W_OK | os.X_OK | |||||
| mode = permissions << 6 | |||||
| if not os.path.exists(BASE_SUMMARY_DIR): | |||||
| os.mkdir(BASE_SUMMARY_DIR, mode=mode) | |||||
| yield | |||||
| finally: | |||||
| if os.path.exists(BASE_SUMMARY_DIR): | |||||
| shutil.rmtree(BASE_SUMMARY_DIR) | |||||
| @@ -0,0 +1,176 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Fuction: | |||||
| Test profiler to watch the performance of training. | |||||
| Usage: | |||||
| pytest tests/st/func/profiler | |||||
| """ | |||||
| import os | |||||
| from unittest import mock, TestCase | |||||
| import pytest | |||||
| from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory | |||||
| from mindinsight.profiler.common.exceptions.exceptions import StepNumNotSupportedException, \ | |||||
| ProfilerParamValueErrorException | |||||
| from mindinsight.profiler.profiling import Profiler, FrameworkParser | |||||
| from tests.st.func.profiler import RAW_DATA_BASE | |||||
| from tests.st.func.profiler.conftest import BASE_SUMMARY_DIR | |||||
| @pytest.mark.usefixtures('create_summary_dir') | |||||
| class TestProfilerAnalyse(TestCase): | |||||
| """Test Converter module.""" | |||||
| JOB_ID = 'JOB3' | |||||
| @classmethod | |||||
| def setup_class(cls): | |||||
| """Generate parsed files.""" | |||||
| cls.step_trace_file = 'step_trace_raw_1_detail_time.csv' | |||||
| cls.generate_parsed_files() | |||||
| def setUp(self): | |||||
| """Setup before each test.""" | |||||
| self.step_trace_analyser = AnalyserFactory.instance().get_analyser( | |||||
| 'step_trace', self.profiler, '1') | |||||
| @classmethod | |||||
| def generate_parsed_files(cls): | |||||
| """Test parse raw info about profiler.""" | |||||
| cls.summary_dir = os.path.join(BASE_SUMMARY_DIR, 'normal_run') | |||||
| cls.profiler = os.path.join(cls.summary_dir, 'profiler') | |||||
| FrameworkParser._raw_data_dir = RAW_DATA_BASE | |||||
| if not os.path.exists(cls.summary_dir): | |||||
| os.makedirs(cls.summary_dir) | |||||
| Profiler._base_profiling_container_path = os.path.join(RAW_DATA_BASE, 'container') | |||||
| with mock.patch('mindinsight.profiler.profiling.PROFILING_LOG_BASE_PATH', RAW_DATA_BASE): | |||||
| profiler = Profiler(subgraph='all', is_detail=True, is_show_op_path=False, | |||||
| output_path=cls.summary_dir, job_id=cls.JOB_ID) | |||||
| profiler.analyse() | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_step_trace_file_exist(self): | |||||
| """Test the step trace file has been generated""" | |||||
| output_files = os.listdir(self.profiler) | |||||
| assert len(output_files) == 9 | |||||
| assert self.step_trace_file in output_files | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_graph_api(self): | |||||
| """Test step trace restful api.""" | |||||
| condition = { | |||||
| 'filter_condition': { | |||||
| 'mode': 'step', | |||||
| 'step_id': 0 | |||||
| } | |||||
| } | |||||
| analyser = self.step_trace_analyser | |||||
| res = analyser.query(condition) | |||||
| assert res['size'] == 322 | |||||
| assert len(res['training_trace_graph']) == 13 | |||||
| assert res['training_trace_graph'][-1] == [ | |||||
| {'name': '', 'start': 0.2038, 'duration': 118.1667}, | |||||
| {'name': 'stream_540_parallel_0', 'start': 118.3705, 'duration': 49.281}, | |||||
| {'name': '', 'start': 167.6515, 'duration': 37.7294}] | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_graph_api_error(self): | |||||
| """Test graph api without mode.""" | |||||
| condition = { | |||||
| 'filter_condition': { | |||||
| 'step_id': -1 | |||||
| }} | |||||
| self.assertRaisesRegex( | |||||
| StepNumNotSupportedException, | |||||
| 'The step num must be in', | |||||
| self.step_trace_analyser.query, | |||||
| condition | |||||
| ) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_target_info_api(self): | |||||
| """Test step trace restful api.""" | |||||
| condition = { | |||||
| 'filter_condition': { | |||||
| 'mode': 'proc', | |||||
| 'step_id': None | |||||
| } | |||||
| } | |||||
| analyser = AnalyserFactory.instance().get_analyser('step_trace', self.profiler, '1') | |||||
| for proc_name in ['iteration_interval', 'fp_and_bp', 'tail']: | |||||
| condition['filter_condition']['proc_name'] = proc_name | |||||
| res = analyser.query(condition) | |||||
| assert res['size'] == 322 | |||||
| assert len(res['info'][proc_name]) == res['size'] | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_summary_for_step_trace(self): | |||||
| """Test summary for step trace.""" | |||||
| analyser = AnalyserFactory.instance().get_analyser('step_trace', self.profiler, '1') | |||||
| summary = analyser.summary | |||||
| assert summary == { | |||||
| 'total_time': 205.3809, | |||||
| 'iteration_interval': '0.1%', | |||||
| 'fp_and_bp': '57.48%', | |||||
| 'tail': '42.42%', | |||||
| 'total_steps': 322} | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_target_info_api_error(self): | |||||
| """Test graph api without mode.""" | |||||
| condition = { | |||||
| 'filter_condition': { | |||||
| 'proc_name': 'fake name' | |||||
| }} | |||||
| self.assertRaisesRegex( | |||||
| ProfilerParamValueErrorException, | |||||
| 'Param value error', | |||||
| self.step_trace_analyser.query, | |||||
| condition | |||||
| ) | |||||
| @@ -140,8 +140,6 @@ class LogOperations: | |||||
| Returns: | Returns: | ||||
| str, Summary log path. | str, Summary log path. | ||||
| """ | """ | ||||
| if log_settings is None: | if log_settings is None: | ||||
| log_settings = dict() | log_settings = dict() | ||||
| @@ -1,21 +1,22 @@ | |||||
| step_num,start_point,end_point,total,fp_point,bp_point,iteration_interval,fp_and_bp,tail,stream_10_parallel_0_start_point,stream_10_parallel_0_end_point,stream_10_parallel_0,stream_10_parallel_1_start_point,stream_10_parallel_1_end_point,stream_10_parallel_1,stream_10_parallel_2_start_point,stream_10_parallel_2_end_point,stream_10_parallel_2,stream_11_parallel_0_start_point,stream_11_parallel_0_end_point,stream_11_parallel_0 | step_num,start_point,end_point,total,fp_point,bp_point,iteration_interval,fp_and_bp,tail,stream_10_parallel_0_start_point,stream_10_parallel_0_end_point,stream_10_parallel_0,stream_10_parallel_1_start_point,stream_10_parallel_1_end_point,stream_10_parallel_1,stream_10_parallel_2_start_point,stream_10_parallel_2_end_point,stream_10_parallel_2,stream_11_parallel_0_start_point,stream_11_parallel_0_end_point,stream_11_parallel_0 | ||||
| 1,45020526465,45041052609,20526144,45020538467,45032339821,12002,11801354,8712788,45020548731,45020574730,25999,45021556176,45021851790,295614,45032850758,45033055282,204524,45020553834,45020586153,32319 | |||||
| 2,45041052609,45061577413,20524804,45041066169,45052867211,13560,11801042,8710202,45041077109,45041096998,19889,45042082149,45042383040,300891,45053384374,45053578836,194462,45041083492,45041111560,28068 | |||||
| 3,45061577413,45082110285,20532872,45061590044,45073390989,12631,11800945,8719296,45061602230,45061625204,22974,45062605143,45062902171,297028,45073902745,45074101879,199134,45061603383,45061630503,27120 | |||||
| 4,45082110285,45102643335,20533050,45082123831,45093925011,13546,11801180,8718324,45082137141,45082154700,17559,45083140904,45083440914,300010,45094442436,45094636568,194132,45082140632,45082172845,32213 | |||||
| 5,45102643335,45123173197,20529862,45102656269,45114458162,12934,11801893,8715035,45102673781,45102695619,21838,45103675921,45103971688,295767,45114969344,45115173035,203691,45102675482,45102700807,25325 | |||||
| 6,45123173197,45143698550,20525353,45123186047,45134986923,12850,11800876,8711627,45123206016,45123217888,11872,45124199293,45124505427,306134,45135499961,45135703946,203985,45123196109,45123230354,34245 | |||||
| 7,45143698550,45164229070,20530520,45143712101,45155513651,13551,11801550,8715419,45143730950,45143750926,19976,45144731691,45145023183,291492,45156025144,45156224720,199576,45143728385,45143758571,30186 | |||||
| 8,45164229070,45184754878,20525808,45164241588,45176043051,12518,11801463,8711827,45164257233,45164281239,24006,45165258707,45165559029,300322,45176561050,45176755753,194703,45164251768,45164285836,34068 | |||||
| 9,45184754878,45205281328,20526450,45184767651,45196569566,12773,11801915,8711762,45184782578,45184799434,16856,45185780544,45186083913,303369,45197086694,45197282669,195975,45184787508,45184816578,29070 | |||||
| 10,45205281328,45225808076,20526748,45205295181,45217096699,13853,11801518,8711377,45205312140,45205328049,15909,45206305285,45206614349,309064,45217611452,45217807480,196028,45205312068,45205339808,27740 | |||||
| 11,45225808076,45246336247,20528171,45225820613,45237622360,12537,11801747,8713887,45225830974,45225860234,29260,45226834268,45227135750,301482,45238137612,45238341621,204009,45225833955,45225870064,36109 | |||||
| 12,45246336247,45266862873,20526626,45246348376,45258149958,12129,11801582,8712915,45246362139,45246380039,17900,45247364231,45247660677,296446,45258666251,45258864967,198716,45246361528,45246394861,33333 | |||||
| 13,45266862873,45287394213,20531340,45266875977,45278677761,13104,11801784,8716452,45266894199,45266909080,14881,45267890915,45268191344,300429,45279191183,45279391317,200134,45266888701,45266917000,28299 | |||||
| 14,45287394213,45307923664,20529451,45287407755,45299209636,13542,11801881,8714028,45287427610,45287441066,13456,45288421386,45288719901,298515,45299722571,45299927585,205014,45287425310,45287449279,23969 | |||||
| 15,45307923664,45328450904,20527240,45307935831,45319737272,12167,11801441,8713632,45307950324,45307974692,24368,45308952601,45309249485,296884,45320250207,45320450146,199939,45307946748,45307977430,30682 | |||||
| 16,45328450904,45348980394,20529490,45328463014,45340264339,12110,11801325,8716055,45328476038,45328500997,24959,45329481974,45329782090,300116,45340777279,45340982640,205361,45328474691,45328507939,33248 | |||||
| 17,45348980394,45369507222,20526828,45348994014,45360794759,13620,11800745,8712463,45349011746,45349029629,17883,45350010381,45350305839,295458,45361314636,45361505942,191306,45349006330,45349041259,34929 | |||||
| 18,45369507222,45390033964,20526742,45369520766,45381322000,13544,11801234,8711964,45369533695,45369553091,19396,45370539131,45370831270,292139,45381839638,45382037802,198164,45369537705,45369568029,30324 | |||||
| 19,45390033964,45410562185,20528221,45390046293,45401847702,12329,11801409,8714483,45390059837,45390080765,20928,45391059730,45391357314,297584,45402361677,45402562220,200543,45390060524,45390092731,32207 | |||||
| -,45215545457,45236073767,20528310,45215558418,45227359836,12961,11801418,8713930,45215573652,45215593314,19662,45216574125,45216873188,299063,45227874681,45228073840,199160,45215573018,45215603636,30619 | |||||
| 1,45000030081,45004033128,4003047,45000030081,45001733025,0,1702944,2300103,45000042679,45000060275,17596,45001048152,45001346254,298102,45002247411,45002448354,200943,45000049687,45000075987,26300 | |||||
| 2,45004033128,45017085658,13052530,45013070937,45014785314,9037809,1714377,2300344,45013085379,45013105429,20050,45014087119,45014385136,298017,45015297166,45015504449,207283,45013084925,45013118334,33409 | |||||
| 3,45017085658,45030119392,13033734,45026116231,45027818443,9030573,1702212,2300949,45026131909,45026150554,18645,45027134392,45027430418,296026,45028337093,45028537767,200674,45026129217,45026160937,31720 | |||||
| 4,45030119392,45043158607,13039215,45039152348,45040856975,9032956,1704627,2301632,45039169890,45039188966,19076,45040169338,45040466770,297432,45041374122,45041567754,193632,45039171681,45039193865,22184 | |||||
| 5,45043158607,45056198128,13039521,45052190932,45053898028,9032325,1707096,2300100,45052207675,45052222642,14967,45053204442,45053505540,301098,45054413207,45054616536,203329,45052201931,45052237599,35668 | |||||
| 6,45056198128,45069239564,13041436,45065233106,45066939463,9034978,1706357,2300101,45065245482,45065272534,27052,45066248423,45066546419,297996,45067455113,45067659145,204032,45065245817,45065279896,34079 | |||||
| 7,45069239564,45082281383,13041819,45078274997,45079980193,9035433,1705196,2301190,45078293910,45078312935,19025,45079287754,45079593841,306087,45080492957,45080691395,198438,45078292067,45078322277,30210 | |||||
| 8,45082281383,45095336378,13054995,45091321488,45093036084,9040105,1714596,2300294,45091338628,45091359138,20510,45092338469,45092638994,300525,45093554195,45093747470,193275,45091341356,45091369667,28311 | |||||
| 9,45095336378,45108372225,13035847,45104363079,45106071009,9026701,1707930,2301216,45104374524,45104400088,25564,45105378751,45105683029,304278,45106587481,45106785336,197855,45104382131,45104410852,28721 | |||||
| 10,45108372225,45121412413,13040188,45117401873,45119111301,9029648,1709428,2301112,45117417721,45117439668,21947,45118413083,45118718050,304967,45119629347,45119829996,200649,45117421502,45117446718,25216 | |||||
| 11,45121412413,45134477662,13065249,45130459598,45132175723,9047185,1716125,2301939,45130478168,45130498936,20768,45131477957,45131775220,297263,45132691645,45132893707,202062,45130470285,45130501652,31367 | |||||
| 12,45134477662,45147533298,13055636,45143521860,45145232553,9044198,1710693,2300745,45143533787,45143557293,23506,45144533554,45144841545,307991,45145744997,45145952255,207258,45143537383,45143563466,26083 | |||||
| 13,45147533298,45160588134,13054836,45156570201,45158286694,9036903,1716493,2301440,45156581069,45156609506,28437,45157581617,45157880841,299224,45158806166,45158999875,193709,45156589050,45156615664,26614 | |||||
| 14,45160588134,45173640064,13051930,45169625906,45171339426,9037772,1713520,2300638,45169637432,45169661754,24322,45170639482,45170940949,301467,45171853721,45172056606,202885,45169644605,45169673410,28805 | |||||
| 15,45173640064,45186671634,13031570,45182666696,45184371430,9026632,1704734,2300204,45182678355,45182698471,20116,45183679568,45183981082,301514,45184887156,45185083035,195879,45182680062,45182708455,28393 | |||||
| 16,45186671634,45199720448,13048814,45195714716,45197420410,9043082,1705694,2300038,45195728993,45195754646,25653,45196732493,45197028048,295555,45197934921,45198139237,204316,45195733069,45195764102,31033 | |||||
| 17,45199720448,45212762605,13042157,45208758416,45210460864,9037968,1702448,2301741,45208771010,45208790367,19357,45209773548,45210074988,301440,45210978277,45211173577,195300,45208773143,45208803280,30137 | |||||
| 18,45212762605,45225814601,13051996,45221801814,45223514580,9039209,1712766,2300021,45221815911,45221839644,23733,45222819211,45223114544,295333,45224031469,45224234043,202574,45221812106,45221849103,36997 | |||||
| 19,45225814601,45238848430,13033829,45234842015,45236548356,9027414,1706341,2300074,45234855444,45234876469,21025,45235853358,45236160825,307467,45237063061,45237260964,197903,45234857141,45234882976,25835 | |||||
| 20,45238848430,45251899738,13051308,45247879385,45249598280,9030955,1718895,2301458,45247896725,45247917316,20591,45248896361,45249193681,297320,45250117916,45250315651,197735,45247894228,45247926723,32495 | |||||
| -,45121436513,45134482124,13045611,45130471874,45132181322,9035360,1709449,2300802,45130486422,45130508229,21808,45131486785,45131787364,300579,45132697369,45132897305,199936,45130487458,45130517315,29857 | |||||
| @@ -0,0 +1,42 @@ | |||||
| step_num,start_point,end_point,total,fp_point,bp_point,iteration_interval,fp_and_bp,tail,stream_10_parallel_0_start_point,stream_10_parallel_0_end_point,stream_10_parallel_0,stream_10_parallel_1_start_point,stream_10_parallel_1_end_point,stream_10_parallel_1,stream_10_parallel_2_start_point,stream_10_parallel_2_end_point,stream_10_parallel_2,stream_11_parallel_0_start_point,stream_11_parallel_0_end_point,stream_11_parallel_0 | |||||
| 1,45000025226,45004034753,4009527,45000025226,45001734362,0,1709136,2300391,45000044023,45000060886,16863,45001043581,45001343373,299792,45002254048,45002452830,198782,45000043807,45000065736,21929 | |||||
| 2,45004034753,45017091420,13056667,45013073790,45014789509,9039037,1715719,2301911,45013085205,45013104210,19005,45014086339,45014393261,306922,45015299546,45015501808,202262,45013085040,45013119810,34770 | |||||
| 3,45017091420,45030144372,13052952,45026123867,45027843651,9032447,1719784,2300721,45026138546,45026154524,15978,45027135742,45027437486,301744,45028363120,45028560901,197781,45026136046,45026171363,35317 | |||||
| 4,45030144372,45043184486,13040114,45039173149,45040883087,9028777,1709938,2301399,45039190927,45039209948,19021,45040185915,45040484897,298982,45041399754,45041594775,195021,45039192768,45039221423,28655 | |||||
| 5,45043184486,45056241064,13056578,45052223555,45053940709,9039069,1717154,2300355,45052241736,45052262186,20450,45053239605,45053540866,301261,45054452604,45054654505,201901,45052233932,45052265774,31842 | |||||
| 6,45056241064,45069291346,13050282,45065278144,45066991121,9037080,1712977,2300225,45065293660,45065316136,22476,45066289480,45066589910,300430,45067511002,45067701731,190729,45065293679,45065321296,27617 | |||||
| 7,45069291346,45082344927,13053581,45078335376,45080043268,9044030,1707892,2301659,45078353164,45078365382,12218,45079354748,45079648384,293636,45080557453,45080760374,202921,45078353030,45078384530,31500 | |||||
| 8,45082344927,45095382554,13037627,45091368697,45093080797,9023770,1712100,2301757,45091381244,45091405208,23964,45092382630,45092684285,301655,45093590961,45093796698,205737,45091381199,45091413840,32641 | |||||
| 9,45095382554,45108433947,13051393,45104419947,45106132133,9037393,1712186,2301814,45104432587,45104457476,24889,45105431458,45105735476,304018,45106651213,45106845305,194092,45104435207,45104466677,31470 | |||||
| 10,45108433947,45121486591,13052644,45117469353,45119185969,9035406,1716616,2300622,45117483627,45117504869,21242,45118483411,45118788540,305129,45119696660,45119898575,201915,45117485587,45117510985,25398 | |||||
| 11,45121486591,45134546571,13059980,45130528618,45132244809,9042027,1716191,2301762,45130539730,45130561122,21392,45131538695,45131846715,308020,45132759789,45132960848,201059,45130545378,45130569412,24034 | |||||
| 12,45134546571,45147608222,13061651,45143597023,45145307273,9050452,1710250,2300949,45143615771,45143631460,15689,45144610592,45144910736,300144,45145818642,45146024326,205684,45143613528,45143640223,26695 | |||||
| 13,45147608222,45160663790,13055568,45156648696,45158362923,9040474,1714227,2300867,45156663193,45156685466,22273,45157661576,45157963074,301498,45158881212,45159074431,193219,45156667038,45156694912,27874 | |||||
| 14,45160663790,45173707626,13043836,45169694535,45171407246,9030745,1712711,2300380,45169710667,45169727936,17269,45170705802,45171013806,308004,45171924100,45172120273,196173,45169708524,45169739038,30514 | |||||
| 15,45173707626,45186754860,13047234,45182750254,45184454036,9042628,1703782,2300824,45182765445,45182789799,24354,45183761335,45184065169,303834,45184973312,45185170444,197132,45182769451,45182799598,30147 | |||||
| 16,45186754860,45199798718,13043858,45195792271,45197497908,9037411,1705637,2300810,45195804771,45195827915,23144,45196804016,45197108243,304227,45198013357,45198209858,196501,45195806656,45195841674,35018 | |||||
| 17,45199798718,45212854993,13056275,45208834355,45210553378,9035637,1719023,2301615,45208850179,45208865588,15409,45209851018,45210151436,300418,45211073169,45211271792,198623,45208847052,45208876998,29946 | |||||
| 18,45212854993,45225893712,13038719,45221888939,45223593704,9033946,1704765,2300008,45221901732,45221924983,23251,45222908795,45223203590,294795,45224105803,45224313354,207551,45221899792,45221938802,39010 | |||||
| 19,45225893712,45238941242,13047530,45234926295,45236640454,9032583,1714159,2300788,45234938628,45234957237,18609,45235942710,45236239983,297273,45237159532,45237356140,196608,45234938330,45234976170,37840 | |||||
| 20,45238941242,45251979177,13037935,45247977674,45249678116,9036432,1700442,2301061,45247990919,45248013476,22557,45248991451,45249294742,303291,45250195733,45250395760,200027,45247988950,45248024969,36019 | |||||
| 21,45251979177,45265018752,13039575,45261005416,45262718472,9026239,1713056,2300280,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 22,45265018752,45278062782,13044030,45274047185,45275762095,9028433,1714910,2300687,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 23,45278062782,45291105708,13042926,45287094000,45288805223,9031218,1711223,2300485,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 24,45291105708,45304155918,13050210,45300150844,45301854040,9045136,1703196,2301878,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 25,45304155918,45317206695,13050777,45313191948,45314905714,9036030,1713766,2300981,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 26,45317206695,45330265105,13058410,45326256021,45327964581,9049326,1708560,2300524,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 27,45330265105,45343324012,13058907,45339305124,45341023739,9040019,1718615,2300273,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 28,45343324012,45356374571,13050559,45352366211,45354073401,9042199,1707190,2301170,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 29,45356374571,45369429514,13054943,45365417827,45367128283,9043256,1710456,2301231,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 30,45369429514,45382479199,13049685,45378476397,45380177297,9046883,1700900,2301902,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 31,45382479199,45395530376,13051177,45391510137,45393229377,9030938,1719240,2300999,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 32,45395530376,45408571765,13041389,45404559082,45406270720,9028706,1711638,2301045,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 33,45408571765,45421635175,13063410,45417619223,45419334221,9047458,1714998,2300954,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 34,45421635175,45434672219,13037044,45430669445,45432371312,9034270,1701867,2300907,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 35,45434672219,45447714036,13041817,45443704548,45445413852,9032329,1709304,2300184,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 36,45447714036,45460765153,13051117,45456753675,45458463701,9039639,1710026,2301452,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 37,45460765153,45473829105,13063952,45469808281,45471527400,9043128,1719119,2301705,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 38,45473829105,45486884190,13055085,45482867237,45484583534,9038132,1716297,2300656,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 39,45486884190,45499928571,13044381,45495917628,45497627921,9033438,1710293,2300650,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| 40,45499928571,45512973815,13045244,45508968990,45510673699,9040419,1704709,2300116,0,0,0,0,0,0,0,0,0,0,0,0 | |||||
| -,45251983006,45265032725,13049720,45261020353,45262731761,9037347,1711408,2300964,21986676455,21986686280,9825,21987163213,21987310272,147058,21987754537,21987851587,97050,21986676441,21986691731,15290 | |||||