From a525f61f680ab4473603cf3af16fbb5e86983b09 Mon Sep 17 00:00:00 2001
From: gzhcv <guozhaohong@huawei.com>
Date: Tue, 22 Sep 2020 17:01:54 +0800
Subject: [PATCH] add st of profiler timeline

---
 .../profiler/analyser/timeline_analyser.py    | 281 +-----------------
 .../func/profiler/test_timeline_analyser.py   | 118 ++++++++
 .../profiler/ascend_timeline_display_0.json   |   1 +
 .../profiler/ascend_timeline_summary_0.json   |   1 +
 .../profiler/gpu_timeline_display_0.json      |   1 +
 .../profiler/gpu_timeline_summary_0.json      |   1 +
 6 files changed, 123 insertions(+), 280 deletions(-)
 create mode 100644 tests/st/func/profiler/test_timeline_analyser.py
 create mode 100644 tests/utils/resource/profiler/ascend_timeline_display_0.json
 create mode 100644 tests/utils/resource/profiler/ascend_timeline_summary_0.json
 create mode 100644 tests/utils/resource/profiler/gpu_timeline_display_0.json
 create mode 100644 tests/utils/resource/profiler/gpu_timeline_summary_0.json

diff --git a/mindinsight/profiler/analyser/timeline_analyser.py b/mindinsight/profiler/analyser/timeline_analyser.py
index 5af13797..80fecedb 100644
--- a/mindinsight/profiler/analyser/timeline_analyser.py
+++ b/mindinsight/profiler/analyser/timeline_analyser.py
@@ -17,75 +17,20 @@ import json
 import os
 
 from mindinsight.profiler.analyser.base_analyser import BaseAnalyser
-from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
-    ProfilerIOException
+from mindinsight.profiler.common.exceptions.exceptions import ProfilerIOException
 from mindinsight.profiler.common.log import logger
 from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path
 from mindinsight.utils.exceptions import ParamValueError
 
-SIZE_LIMIT = 20 * 1024 * 1024  # 20MB
-
-
-class TimelineContainer:
-    """
-    A container of operator computation metadata.
-
-    Args:
-        split_list (list): The split list of metadata in op_compute output file.
-    """
-    def __init__(self, split_list):
-        self._op_name = split_list[0]
-        self._stream_id = int(split_list[1])
-        self._start_time = float(split_list[2])
-        self._duration = float(split_list[3])
-        self._pid = None
-        if len(split_list) == 5:
-            self._pid = int(split_list[4])
-
-    @property
-    def op_name(self):
-        """Get the name of the operator."""
-        return self._op_name
-
-    @property
-    def stream_id(self):
-        """Get the stream id of the operator."""
-        return self._stream_id
-
-    @property
-    def start_time(self):
-        """Get the execution start time of the operator."""
-        return self._start_time
-
-    @property
-    def duration(self):
-        """Get the duration of the operator execution."""
-        return self._duration
-
-    @property
-    def pid(self):
-        """Get the pid of the operator execution."""
-        return self._pid
-
 
 class TimelineAnalyser(BaseAnalyser):
     """
     Analyse timeline data from file.
     """
-    __col_names__ = ['op_name', 'stream_id', 'start_time', 'duration']
-    _output_timeline_data_file_path = 'output_timeline_data_{}.txt'
-    _min_cycle_counter_file_path = 'min_cycle_counter_{}.txt'
     _ascend_display_filename = 'ascend_timeline_display_{}.json'
     _gpu_display_filename = 'gpu_timeline_display_{}.json'
     _ascend_timeline_summary_filename = 'ascend_timeline_summary_{}.json'
     _gpu_timeline_summary_filename = 'gpu_timeline_summary_{}.json'
-    _timeline_meta = []
-    _timeline_summary = {
-        'total_time': 0,
-        'num_of_streams': 0,
-        'num_of_ops': 0,
-        'op_exe_times': 0
-    }
 
     def _load(self):
         """Load data according to the parsed profiling files."""
@@ -161,227 +106,3 @@ class TimelineAnalyser(BaseAnalyser):
             logger.info('No timeline summary file. Please check the output path.')
 
         return timeline_summary
-
-    def write_timeline(self):
-        """Load data according to the parsed profiling files."""
-        # Write timeline to file.
-        logger.info('Writing timeline file...')
-        self.write_timeline_to_json_by_limitation()
-        logger.info('Finished file writing!')
-
-    def write_timeline_to_json_by_limitation(self):
-        """Write timeline to json by limitation."""
-        display_filename = self._display_filename.format(self._device_id)
-        display_file_path = os.path.join(
-            self._profiling_dir,
-            display_filename
-        )
-        display_file_path = validate_and_normalize_path(
-            display_file_path, raise_key='Invalid timeline display json path.'
-        )
-
-        length = len(self._timeline_meta)
-        try:
-            with open(display_file_path, 'w') as json_file:
-                json_file.write('[')
-                for index, item in enumerate(self._timeline_meta):
-                    json.dump(item, json_file)
-                    file_size = os.path.getsize(display_file_path)
-                    if file_size > SIZE_LIMIT:
-                        break
-                    if index == length - 1:
-                        break
-                    json_file.write(',')
-                json_file.write(']')
-        except (IOError, OSError) as err:
-            logger.error('Error occurred when write timeline display file: %s', err)
-            raise ProfilerIOException
-
-    def write_timeline_summary(self):
-        """Write timeline summary to json."""
-        timeline_summary_file_path = os.path.join(
-            self._profiling_dir,
-            self._timeline_summary_filename.format(self._device_id)
-        )
-
-        timeline_summary_file_path = validate_and_normalize_path(
-            timeline_summary_file_path, raise_key='Invalid timeline summary path.'
-        )
-
-        try:
-            with open(timeline_summary_file_path, 'w') as json_file:
-                json.dump(self._timeline_summary, json_file)
-        except (IOError, OSError) as err:
-            logger.error('Error occurred when write timeline summary file: %s', err)
-            raise ProfilerIOException
-
-    def _load_timeline_data(self):
-        """Load timeline data from file."""
-        file_path = os.path.join(
-            self._profiling_dir,
-            self._output_timeline_data_file_path.format(self._device_id)
-        )
-        file_path = validate_and_normalize_path(
-            file_path, raise_key='Invalid timeline txt file path.'
-        )
-        if not os.path.exists(file_path):
-            logger.error("Failed to find parsed timeline file.")
-            raise ProfilerFileNotFoundException('parsed timeline file')
-
-        timeline_list = []
-        try:
-            with open(file_path, 'r') as f_obj:
-                for line in f_obj:
-                    if not line.startswith('op_name'):
-                        line_list = line.strip('\n').split(',')
-                        timeline_list.append(line_list)
-        except (IOError, OSError) as err:
-            logger.error('Error occurred when read timeline intermediate file: %s', err)
-            raise ProfilerIOException
-
-        return timeline_list
-
-    def _parse_timeline_data(self, timeline, min_cycle_counter):
-        """Parse timeline data."""
-        # factor to convert the time unit from 1ms to 1us for timeline display
-        factor = 1000
-        op_meta = TimelineContainer(timeline)
-        timeline_dict = {}
-        timeline_dict['name'] = op_meta.op_name
-        timeline_dict['ph'] = 'X'
-        timeline_dict['tid'] = op_meta.stream_id
-        timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) * factor
-        dur = op_meta.duration * factor
-        timeline_dict['dur'] = dur
-        if op_meta.pid is None:
-            timeline_dict['pid'] = int(self._device_id)
-            # Update total time of operator execution.
-            self._timeline_summary['total_time'] += dur
-        else:  # AllReduce and AI CPU pid
-            timeline_dict['pid'] = op_meta.pid
-        self._timeline_meta.append(timeline_dict)
-
-    @staticmethod
-    def _update_num_of_streams(timeline, stream_count_dict):
-        """Update number of streams."""
-        stream_id = timeline[1]
-        if stream_id not in stream_count_dict.keys():
-            stream_count_dict[stream_id] = 1
-        else:
-            stream_count_dict[stream_id] += 1
-
-    def get_min_cycle_counter(self):
-        """
-        Get minimum cycle counter.
-
-        Returns:
-            float, the minimum value of the cycle counter.
-        """
-        file_path = os.path.join(
-            self._profiling_dir,
-            self._min_cycle_counter_file_path.format(self._device_id)
-        )
-
-        file_path = validate_and_normalize_path(
-            file_path, raise_key='Invalid min cycle counter file path.'
-        )
-
-        if os.path.exists(file_path):
-            try:
-                with open(file_path, 'r') as f_obj:
-                    min_cycle_counter = f_obj.read()
-                    min_cycle_counter = float(min_cycle_counter) \
-                        if not min_cycle_counter == 'inf' else 0
-            except (IOError, OSError) as err:
-                logger.error('Error occurred when read minimum cycle counter: %s', err)
-                raise ProfilerIOException
-        else:
-            min_cycle_counter = 0
-            logger.info("No min cycle counter recorded.")
-
-        return min_cycle_counter
-
-    def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter):
-        """
-        Init timeline metadata, adding all collected info.
-
-        Args:
-            all_reduce_info (list[list]): The metadata of AllReduce operator.
-            framework_info (dict): The framework metadata.
-            aicpu_info (dict): The metadata of AI CPU operator.
-            min_cycle_counter (float): The minimum cycle counter of the timeline.
-        """
-        if min_cycle_counter == float('inf'):
-            min_cycle_counter = 0
-
-        logger.info('Initiating timeline...')
-        timeline_list = self._load_timeline_data()
-        self._timeline_summary['op_exe_times'] = len(timeline_list)
-
-        # Add AllReduce info to timeline temp list and sort by start time.
-        if all_reduce_info:
-            logger.debug('AllReduce info found. Start adding info into timeline...')
-            timeline_list.extend(all_reduce_info)
-            timeline_list.sort(key=lambda x: float(x[2]))
-
-        # Add AI CPU data into timeline temp list and sort by start time.
-        aicpu_data = aicpu_info.get('info')
-        if aicpu_data:
-            timeline_list.extend(aicpu_data)
-            timeline_list.sort(key=lambda x: float(x[2]))
-            self._timeline_summary['op_exe_times'] += aicpu_info.get('op_exe_times', 0)
-            self._timeline_summary['num_of_streams'] += aicpu_info.get('num_of_streams', 0)
-            self._timeline_summary['num_of_ops'] += aicpu_info.get('num_of_ops', 0)
-            self._timeline_summary['total_time'] += aicpu_info.get('total_time', 0)
-
-        # Init a dict for counting the num of streams.
-        stream_count_dict = {}
-        for timeline in timeline_list:
-            self._parse_timeline_data(timeline, min_cycle_counter)
-            # Updating the collection of streams.
-            if len(timeline) == 4:
-                self._update_num_of_streams(timeline, stream_count_dict)
-
-        # Get framework metadata.
-        framework_obj_list = framework_info.get('object')
-        # The length of list is the number of operators.
-        self._timeline_summary['num_of_ops'] += len(framework_obj_list)
-        self._add_framework_info(framework_obj_list)
-        logger.info('Finished adding info into timeline...')
-
-        # Update timeline summary info
-        self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
-
-    def _add_framework_info(self, framework_obj_list):
-        """
-        Add framework info into timeline metadata.
-
-        Args:
-            framework_obj_list (list): The framework metadata.
-        """
-        logger.debug('Start adding framework info into timeline...')
-        # Get the framework info that will be written into timeline.
-        framework_info_dict = {}
-        for framework_obj in framework_obj_list:
-            op_name = framework_obj[0]
-            op_type = framework_obj[1]
-            op_full_name = framework_obj[4]
-            op_info = framework_obj[5]
-            framework_info_dict[op_full_name] = {
-                'name': op_name,
-                'args': {
-                    'type': op_type,
-                    'fullname': op_full_name
-                }
-            }
-            framework_info_dict[op_full_name]['args'].update(op_info)
-
-        # Insert framework info into timeline.
-        for timeline_item in self._timeline_meta:
-            op_full_name = timeline_item.get('name')
-            framework_item = framework_info_dict.get(op_full_name)
-            if framework_item:
-                timeline_item['name'] = framework_item.get('name')
-                timeline_item['args'] = framework_item.get('args')
-
-        logger.debug('Finished adding framework info into timeline...')
diff --git a/tests/st/func/profiler/test_timeline_analyser.py b/tests/st/func/profiler/test_timeline_analyser.py
new file mode 100644
index 00000000..1908861e
--- /dev/null
+++ b/tests/st/func/profiler/test_timeline_analyser.py
@@ -0,0 +1,118 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Fuction:
+    Test profiler to watch the performance of training.
+Usage:
+    pytest tests/st/func/profiler
+"""
+import os
+
+import pytest
+
+from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
+from . import PROFILER_DIR
+from ....utils.tools import compare_result_with_file
+
+class TestTimelineAnalyser:
+    """Test timeline analyser module."""
+
+    @classmethod
+    def setup_class(cls):
+        """Generate parsed files."""
+        cls.profiler = PROFILER_DIR
+        cls.device_id = 0
+        cls.ascend_display_filename = 'ascend_timeline_display_{}.json'
+        cls.gpu_display_filename = 'gpu_timeline_display_{}.json'
+        cls.ascend_timeline_summary_filename = 'ascend_timeline_summary_{}.json'
+        cls.gpu_timeline_summary_filename = 'gpu_timeline_summary_{}.json'
+
+    def setup_method(self):
+        """Create analyser."""
+        self._analyser = AnalyserFactory.instance().get_analyser(
+            'timeline', self.profiler, self.device_id)
+
+
+    @pytest.mark.level0
+    @pytest.mark.env_single
+    @pytest.mark.platform_x86_cpu
+    @pytest.mark.platform_arm_ascend_training
+    @pytest.mark.platform_x86_gpu_training
+    @pytest.mark.platform_x86_ascend_training
+    def test_get_display_timeline(self):
+        """Test the function of get timeline data for UI display."""
+        gpu_file_path = os.path.join(
+            self.profiler,
+            self.gpu_display_filename.format(self.device_id)
+        )
+        ascend_file_path = os.path.join(
+            self.profiler,
+            self.ascend_display_filename.format(self.device_id)
+        )
+
+        result = self._analyser.get_display_timeline("gpu")
+        compare_result_with_file(result, gpu_file_path)
+
+        result = self._analyser.get_display_timeline("ascend")
+        compare_result_with_file(result, ascend_file_path)
+
+    @pytest.mark.level0
+    @pytest.mark.env_single
+    @pytest.mark.platform_x86_cpu
+    @pytest.mark.platform_arm_ascend_training
+    @pytest.mark.platform_x86_gpu_training
+    @pytest.mark.platform_x86_ascend_training
+    def test_get_timeline_summary(self):
+        """Test the function of get timeline data for UI display."""
+        gpu_file_path = os.path.join(
+            self.profiler,
+            self.gpu_timeline_summary_filename.format(self.device_id)
+        )
+        ascend_file_path = os.path.join(
+            self.profiler,
+            self.ascend_timeline_summary_filename.format(self.device_id)
+        )
+
+        result = self._analyser.get_timeline_summary("gpu")
+        compare_result_with_file(result, gpu_file_path)
+
+        result = self._analyser.get_timeline_summary("ascend")
+        compare_result_with_file(result, ascend_file_path)
+
+    @pytest.mark.level0
+    @pytest.mark.env_single
+    @pytest.mark.platform_x86_cpu
+    @pytest.mark.platform_arm_ascend_training
+    @pytest.mark.platform_x86_gpu_training
+    @pytest.mark.platform_x86_ascend_training
+    def test_timeline_summary_file_not_exist(self):
+        """Test the function of get timeline data for UI display."""
+        device_id = 1
+        analyser = AnalyserFactory.instance().get_analyser(
+            'timeline', self.profiler, device_id)
+        analyser.get_timeline_summary("gpu")
+
+    @pytest.mark.level0
+    @pytest.mark.env_single
+    @pytest.mark.platform_x86_cpu
+    @pytest.mark.platform_arm_ascend_training
+    @pytest.mark.platform_x86_gpu_training
+    @pytest.mark.platform_x86_ascend_training
+    def test_timeline_display_file_not_exist(self):
+        """Test the function of get timeline data for UI display."""
+        device_id = 1
+        analyser = AnalyserFactory.instance().get_analyser(
+            'timeline', self.profiler, device_id)
+        analyser.get_display_timeline("gpu")
diff --git a/tests/utils/resource/profiler/ascend_timeline_display_0.json b/tests/utils/resource/profiler/ascend_timeline_display_0.json
new file mode 100644
index 00000000..0041f61c
--- /dev/null
+++ b/tests/utils/resource/profiler/ascend_timeline_display_0.json
@@ -0,0 +1 @@
+[{"name": "InitData", "ph": "X", "tid": "0", "ts": 0.0, "dur": 159.9999964237213, "pid": 9000},{"name": "GetNext", "ph": "X", "tid": "0", "ts": 6042138.9999985695, "dur": 517.9999768733978, "pid": 9000},{"name": "AssignAdd-op142", "ph": "X", "tid": "519", "ts": 6042909.559994936, "dur": 3.1, "pid": 1, "args": {"type": "AssignAdd", "fullname": "Default/AssignAdd-op142", "input_0": {"format": "DefaultFormat", "data_type": "NUMBER_TYPE_UINT8", "shape": "1"}, "input_1": {"format": "DefaultFormat", "data_type": "NUMBER_TYPE_UINT8", "shape": "1"}, "output_0": {"format": "DefaultFormat", "data_type": "NUMBER_TYPE_UINT8", "shape": "1"}}}]
diff --git a/tests/utils/resource/profiler/ascend_timeline_summary_0.json b/tests/utils/resource/profiler/ascend_timeline_summary_0.json
new file mode 100644
index 00000000..256734c2
--- /dev/null
+++ b/tests/utils/resource/profiler/ascend_timeline_summary_0.json
@@ -0,0 +1 @@
+{"total_time": 23147.013999999992, "num_of_streams": 2, "num_of_ops": 213, "op_exe_times": 112}
\ No newline at end of file
diff --git a/tests/utils/resource/profiler/gpu_timeline_display_0.json b/tests/utils/resource/profiler/gpu_timeline_display_0.json
new file mode 100644
index 00000000..2236a100
--- /dev/null
+++ b/tests/utils/resource/profiler/gpu_timeline_display_0.json
@@ -0,0 +1 @@
+[{"name": "Default/InitDataSetQueue-op0", "ph": "X", "tid": "Ops", "ts": 1600695893269618.2, "dur": 303.84, "pid": 0},{"name": "void MomentumUpdateVariableKernel<float, float, float>(unsigned long, float*, float*, float const*, float const*, float const*, bool)", "ph": "X", "tid": "17", "ts": 1600695897737724.8, "dur": 5.792, "pid": 0, "args": {"type": "cuLaunchKernel", "op_full_name": "Default/optimizer-Momentum/ApplyMomentum-op55", "block_dim": "1024,1,1", "grid_dim": "8,1,1"}}]
diff --git a/tests/utils/resource/profiler/gpu_timeline_summary_0.json b/tests/utils/resource/profiler/gpu_timeline_summary_0.json
new file mode 100644
index 00000000..ee975dbd
--- /dev/null
+++ b/tests/utils/resource/profiler/gpu_timeline_summary_0.json
@@ -0,0 +1 @@
+{"total_time": 3875.638703999999, "num_of_streams": 1, "num_of_ops": 66, "op_exe_times": 66}
\ No newline at end of file