Compare commits

...

19 Commits
master ... r0.7

Author SHA1 Message Date
  mindspore-ci-bot d89dcb8406 !756 update securec repository link for r0.7 5 years ago
  LYX e8f162e20c update securec repository link 5 years ago
  mindspore-ci-bot 7149053053 !585 update README files of r0.7 branch 5 years ago
  liangyongxiong fdb93aa60e update README files 5 years ago
  mindspore-ci-bot a9b99418e5 !580 store data with default datatype when call numpy.array in TensorContainer and remove limitaion of datatype 5 years ago
  wangshuide2020 4d43bd6c5a Store data with default datatype in TensorContainer and remove limitation of datatype. 5 years ago
  mindspore-ci-bot 1fd40dc0cf !579 1.remove redundant data to save memory and simplify the TensorContainer. 2.kill children processes of worker when itself has been killed by gunicorn master. 5 years ago
  wangshuide2020 ac47204573 1.remove redundant data to save memory and simplify the tensorcontainer. 5 years ago
  mindspore-ci-bot d4d952e0f7 !574 Fix README.md broken links 5 years ago
  mindspore-ci-bot 3dd658942c !577 fix profiling issues 5 years ago
  WeiFeng-mindinsight 63d12fa9e6 fix profiling issues I1SQM6 I1SQMI I1SL3D 5 years ago
  mindspore-ci-bot 1e0088bd64 !572 optimize mindwizard network templates for r0.7 5 years ago
  Li Hongzhang 6465f524fd fix README.md links 5 years ago
  liangyongxiong 8e17bc9754 optimize network templates 5 years ago
  mindspore-ci-bot 7891976d61 !569 UI change dashboard jumping mode and default auto fresh time 5 years ago
  ph 754dd37a0f fix issue I1RIVS:change dashboard jumping mode and modify auto fresh time to 10s as default 5 years ago
  mindspore-ci-bot c891007cae !565 If there is no data in the traceability module, click the disabled form check box to modify the check style 5 years ago
  mindspore-ci-bot ce5cb7f934 !566 The assistant of profiling module under GPU adds training log path display 5 years ago
  qin_jun_yan fe2b8abef5 The assistant of profiling module under GPU adds training log path display 5 years ago
42 changed files with 804 additions and 542 deletions
Split View
  1. +1
    -1
      .gitmodules
  2. +7
    -7
      README.md
  3. +6
    -6
      README_CN.md
  4. +45
    -0
      mindinsight/backend/config/gunicorn_conf.py
  5. +1
    -1
      mindinsight/backend/run.py
  6. +4
    -6
      mindinsight/datavisual/data_transform/ms_data_loader.py
  7. +34
    -38
      mindinsight/datavisual/data_transform/tensor_container.py
  8. +5
    -6
      mindinsight/datavisual/processors/tensor_processor.py
  9. +10
    -7
      mindinsight/ui/src/locales/en-us.json
  10. +14
    -11
      mindinsight/ui/src/locales/zh-cn.json
  11. +1
    -1
      mindinsight/ui/src/store.js
  12. +272
    -213
      mindinsight/ui/src/views/profiling-gpu/operator.vue
  13. +21
    -0
      mindinsight/ui/src/views/profiling-gpu/profiling.vue
  14. +5
    -1
      mindinsight/ui/src/views/profiling/data-process.vue
  15. +248
    -202
      mindinsight/ui/src/views/profiling/operator.vue
  16. +2
    -1
      mindinsight/ui/src/views/profiling/profiling.vue
  17. +1
    -3
      mindinsight/ui/src/views/train-manage/compare-plate.vue
  18. +118
    -13
      mindinsight/ui/src/views/train-manage/summary-manage.vue
  19. +4
    -4
      mindinsight/wizard/README_CN.md
  20. +1
    -0
      mindinsight/wizard/base/source_file.py
  21. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh-tpl
  22. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh-tpl
  23. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh-tpl
  24. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh-tpl
  25. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh-tpl
  26. +0
    -1
      mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh-tpl
  27. +1
    -1
      mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl
  28. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh-tpl
  29. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh-tpl
  30. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh-tpl
  31. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh-tpl
  32. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh-tpl
  33. +0
    -1
      mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh-tpl
  34. +1
    -1
      mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl
  35. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh-tpl
  36. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh-tpl
  37. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh-tpl
  38. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh-tpl
  39. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh-tpl
  40. +0
    -1
      mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh-tpl
  41. +1
    -1
      mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl
  42. +1
    -0
      mindinsight/wizard/create_project.py

+ 1
- 1
.gitmodules View File

@@ -1,3 +1,3 @@
[submodule "third_party/securec"]
path = third_party/securec
url = https://gitee.com/openeuler/bounds_checking_function.git
url = https://gitee.com/openeuler/libboundscheck.git

+ 7
- 7
README.md View File

@@ -1,8 +1,8 @@
[简体中文](./README.md)
[简体中文](./README_CN.md)

- [Introduction ](#introduction)
- [Installation](#installation)
- [QuickStart](#quickstart)
- [QuickStart](#quick-start)
- [Docs](#docs)
- [Community](#community)
- [Governance](#governance)
@@ -16,8 +16,8 @@ MindInsight provides MindSpore with easy-to-use debugging and tuning capabilitie

![MindInsight Architecture](docs/arch.png)

Click to view the [Design document](https://www.mindspore.cn/docs/en/master/design.html),learn more about the design.
Click to view the [Tutorial documentation](https://www.mindspore.cn/tutorial/en/master/advanced_use/visualization_tutorials.html) learn more about the MindInsight tutorial.
Click to view the [Design document](https://www.mindspore.cn/docs/en/r0.7/design.html),learn more about the design.
Click to view the [Tutorial documentation](https://www.mindspore.cn/tutorial/en/r0.7/advanced_use/visualization_tutorials.html) learn more about the MindInsight tutorial.

## Installation
Download whl package from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
@@ -31,7 +31,7 @@ For more details on how to install MindInsight, click on the MindInsight section
## Quick Start
Before using MindInsight, the data in the training process should be recorded. When starting MindInsight, the directory of the saved data should be specified. After successful startup, the data can be viewed through the web page. Here is a brief introduction to recording training data, as well as starting and stopping MindInsight.

[SummaryCollector](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector) is the interface MindSpore provides for a quick and easy collection of common data about computational graphs, loss values, learning rates, parameter weights, and so on. Below is an example of using `SummaryCollector` for data collection, specifying the directory where the data is stored in `./summary_dir`.
[SummaryCollector](https://www.mindspore.cn/api/en/r0.7/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector) is the interface MindSpore provides for a quick and easy collection of common data about computational graphs, loss values, learning rates, parameter weights, and so on. Below is an example of using `SummaryCollector` for data collection, specifying the directory where the data is stored in `./summary_dir`.
```
...

@@ -40,7 +40,7 @@ summary_collector = SummaryCollector(summary_dir='./summary_dir')
model.train(epoch=1, ds_train, callbacks=[summary_collector])
```

For more ways to record visual data, see the [MindInsight Tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/visualization_tutorials.html).
For more ways to record visual data, see the [MindInsight Tutorial](https://www.mindspore.cn/tutorial/en/r0.7/advanced_use/visualization_tutorials.html).

After you've collected the data, when you launch MindInsight, specify the directory in which the data has been stored.
```
@@ -69,7 +69,7 @@ Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/comm
- Mailing-list: <https://mailweb.mindspore.cn/postorius/lists>

## Contributing
Welcome contributions. See our [Contributor Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md) for
Welcome contributions. See our [Contributor Wiki](https://gitee.com/mindspore/mindspore/blob/r0.7/CONTRIBUTING.md) for
more details.

## Release Notes


+ 6
- 6
README_CN.md View File

@@ -16,14 +16,14 @@ MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过

![MindInsight Architecture](docs/arch.png)

点击查看[设计文档](https://www.mindspore.cn/docs/zh-CN/master/design.html),了解更多设计详情。
点击查看[教程文档](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/visualization_tutorials.html),了解更多MindInsight教程。
点击查看[设计文档](https://www.mindspore.cn/docs/zh-CN/r0.7/design.html),了解更多设计详情。
点击查看[教程文档](https://www.mindspore.cn/tutorial/zh-CN/r0.7/advanced_use/visualization_tutorials.html),了解更多MindInsight教程。

## 安装
请从[MindSpore下载页面](https://www.mindspore.cn/versions)下载并安装whl包。

```
pip install mindinsight-{version}-cp37-cp37m-linux_{arch}.whl
pip install -U mindinsight-{version}-cp37-cp37m-linux_{arch}.whl
```

更多MindInsight的安装方法,请点击[安装教程](https://www.mindspore.cn/install/)中的MindInsight章节进行查看。
@@ -32,7 +32,7 @@ pip install mindinsight-{version}-cp37-cp37m-linux_{arch}.whl
使用MindInsight前,需要先将训练过程中的数据记录下来,启动MindInsight时,指定所保存的数据的位置,启动成功后,
即可通过可视化页面查看数据。下面将简单介绍记录训练过程数据,以及启动、停止MindInsight服务。

[SummaryCollector](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector)是MindSpore提供的快速简易地收集一些常见信息的接口,收集的信息包括计算图、损失值、学习率、参数权重等。
[SummaryCollector](https://www.mindspore.cn/api/zh-CN/r0.7/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector)是MindSpore提供的快速简易地收集一些常见信息的接口,收集的信息包括计算图、损失值、学习率、参数权重等。
下面是使用 `SummaryCollector` 进行数据收集的示例,其中指定存放数据的目录为 `./summary_dir`。
```
...
@@ -42,7 +42,7 @@ summary_collector = SummaryCollector(summary_dir='./summary_dir')
model.train(epoch=1, ds_train, callbacks=[summary_collector])
```

更多记录可视化数据的方法,请点击查看[MindInsight使用教程](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/visualization_tutorials.html)。
更多记录可视化数据的方法,请点击查看[MindInsight使用教程](https://www.mindspore.cn/tutorial/zh-CN/r0.7/advanced_use/visualization_tutorials.html)。

收集好数据后,启动MindInsight时指定存放数据的目录。
```
@@ -70,7 +70,7 @@ mindinsight stop
- 邮件列表:<https://mailweb.mindspore.cn/postorius/lists>

## 贡献
欢迎参与贡献。更多详情,请参阅我们的[贡献者Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md)。
欢迎参与贡献。更多详情,请参阅我们的[贡献者Wiki](https://gitee.com/mindspore/mindspore/blob/r0.7/CONTRIBUTING.md)。

## 版本说明
版本说明请参阅[RELEASE](RELEASE.md)。


+ 45
- 0
mindinsight/backend/config/gunicorn_conf.py View File

@@ -15,9 +15,13 @@
"""Config file for gunicorn."""

import os
import multiprocessing
import signal
import threading
import time
from importlib import import_module

import psutil
import gunicorn


@@ -43,3 +47,44 @@ def on_starting(server):
hook_module = import_module('mindinsight.utils.hook')
for hook in hook_module.HookUtils.instance().hooks():
threading.Thread(target=hook.on_startup, args=(server.log,)).start()


def post_fork(server, worker):
"""
Launch a process to listen worker after gunicorn fork worker.

Children processes of gunicorn worker should be killed when worker has been killed
because gunicorn master murders this worker for some reasons such as worker timeout.

Args:
server (Arbiter): gunicorn server instance.
worker (ThreadWorker): worker instance.
"""
def murder_worker_children_processes():
processes_to_kill = []
# sleep 3 seconds so that all worker children processes have been launched.
time.sleep(3)
process = psutil.Process(worker.pid)
for child in process.children(recursive=True):
if child.pid != os.getpid():
processes_to_kill.append(child)
while True:
if os.getppid() != worker.pid:
current_worker_pid = os.getppid()
for proc in processes_to_kill:
server.log.info("Original worker pid: %d, current worker pid: %d, stop process %d",
worker.pid, current_worker_pid, proc.pid)
try:
proc.send_signal(signal.SIGKILL)
except psutil.NoSuchProcess:
continue
except psutil.Error as ex:
server.log.error("Stop process %d failed. Detail: %s.", proc.pid, str(ex))
server.log.info("%d processes have been killed.", len(processes_to_kill))
break
time.sleep(1)

listen_process = multiprocessing.Process(target=murder_worker_children_processes,
name="murder_worker_children_processes")
listen_process.start()
server.log.info("Server pid: %d, start to listening.", server.pid)

+ 1
- 1
mindinsight/backend/run.py View File

@@ -209,7 +209,7 @@ class GunicornLogger(Logger):
super(GunicornLogger, self).__init__(cfg)

def now(self):
"""return the log format"""
"""Get log format."""
return time.strftime('[%Y-%m-%d-%H:%M:%S %z]')

def setup(self, cfg):


+ 4
- 6
mindinsight/datavisual/data_transform/ms_data_loader.py View File

@@ -296,6 +296,7 @@ class _SummaryParser(_Parser):
self._load_single_file(self._summary_file_handler, executor)
# Wait for data in this file to be processed to avoid loading multiple files at the same time.
executor.wait_all_tasks_finish()
logger.info("Parse summary file finished, file path: %s.", file_path)
except UnknownError as ex:
logger.warning("Parse summary file failed, detail: %r,"
"file path: %s.", str(ex), file_path)
@@ -383,7 +384,7 @@ class _SummaryParser(_Parser):
# read the header
header_str = file_handler.read(HEADER_SIZE)
if not header_str:
logger.info("End of file, file_path=%s.", file_handler.file_path)
logger.info("Load summary file finished, file_path=%s.", file_handler.file_path)
return None
header_crc_str = file_handler.read(CRC_STR_SIZE)
if not header_crc_str:
@@ -441,12 +442,9 @@ class _SummaryParser(_Parser):

elif plugin == PluginNameEnum.TENSOR.value:
tensor_event_value = TensorContainer(tensor_event_value)
tensor_count = 1
for d in tensor_event_value.dims:
tensor_count *= d
if tensor_count > MAX_TENSOR_COUNT:
if tensor_event_value.size > MAX_TENSOR_COUNT:
logger.warning('tag: %s/tensor, dims: %s, tensor count: %d exceeds %d and drop it.',
value.tag, tensor_event_value.dims, tensor_count, MAX_TENSOR_COUNT)
value.tag, tensor_event_value.dims, tensor_event_value.size, MAX_TENSOR_COUNT)
return None

elif plugin == PluginNameEnum.IMAGE.value:


+ 34
- 38
mindinsight/datavisual/data_transform/tensor_container.py View File

@@ -13,12 +13,11 @@
# limitations under the License.
# ============================================================================
"""Tensor data container."""
import threading

import numpy as np

from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_transform.histogram import Histogram, Bucket
from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2
from mindinsight.datavisual.utils.utils import calc_histogram_bins
from mindinsight.utils.exceptions import ParamValueError

@@ -139,19 +138,6 @@ def get_statistics_from_tensor(tensors):
return statistics


def _get_data_from_tensor(tensor):
"""
Get data from tensor and convert to tuple.

Args:
tensor (TensorProto): Tensor proto data.

Returns:
tuple, the item of tensor value.
"""
return tuple(tensor.float_data)


def calc_original_buckets(np_value, stats):
"""
Calculate buckets from tensor data.
@@ -199,19 +185,24 @@ class TensorContainer:
"""

def __init__(self, tensor_message):
self._lock = threading.Lock
# Original dims can not be pickled to transfer to other process, so tuple is used.
self._dims = tuple(tensor_message.dims)
self._data_type = tensor_message.data_type
self._np_array = None
self._data = _get_data_from_tensor(tensor_message)
self._stats = get_statistics_from_tensor(self.get_or_calc_ndarray())
original_buckets = calc_original_buckets(self.get_or_calc_ndarray(), self._stats)
self._np_array = self.get_ndarray(tensor_message.float_data)
self._stats = get_statistics_from_tensor(self._np_array)
original_buckets = calc_original_buckets(self._np_array, self._stats)
self._count = sum(bucket.count for bucket in original_buckets)
self._max = self._stats.max
self._min = self._stats.min
# convert the type of max and min value to np.float64 so that it cannot overflow
# when calculating width of histogram.
self._max = np.float64(self._stats.max)
self._min = np.float64(self._stats.min)
self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count)

@property
def size(self):
"""Get size of tensor."""
return self._np_array.size

@property
def dims(self):
"""Get dims of tensor."""
@@ -222,6 +213,11 @@ class TensorContainer:
"""Get data type of tensor."""
return self._data_type

@property
def ndarray(self):
"""Get ndarray of tensor."""
return self._np_array

@property
def max(self):
"""Get max value of tensor."""
@@ -251,19 +247,19 @@ class TensorContainer:
"""Get histogram buckets."""
return self._histogram.buckets()

def get_or_calc_ndarray(self):
"""Get or calculate ndarray."""
with self._lock():
if self._np_array is None:
self._convert_to_numpy_array()
return self._np_array
def _convert_to_numpy_array(self):
"""Convert a list data to numpy array."""
try:
ndarray = np.array(self._data).reshape(self._dims)
except ValueError as ex:
logger.error("Reshape array fail, detail: %r", str(ex))
return
self._np_array = ndarray
def get_ndarray(self, tensor):
"""
Get ndarray of tensor.
Args:
tensor (mindinsight_anf_ir.proto.DataType): tensor data.
Returns:
numpy.ndarray, ndarray of tensor.
"""
data_type_str = anf_ir_pb2.DataType.Name(self.data_type)
if data_type_str == 'DT_FLOAT16':
return np.array(tuple(tensor), dtype=np.float16).reshape(self.dims)
if data_type_str == 'DT_FLOAT32':
return np.array(tuple(tensor), dtype=np.float32).reshape(self.dims)
return np.array(tuple(tensor)).reshape(self.dims)

+ 5
- 6
mindinsight/datavisual/processors/tensor_processor.py View File

@@ -99,9 +99,9 @@ def get_statistics_dict(stats):
dict, a dict including 'max', 'min', 'avg', 'count', 'nan_count', 'neg_inf_count', 'pos_inf_count'.
"""
statistics = {
"max": stats.max,
"min": stats.min,
"avg": stats.avg,
"max": float(stats.max),
"min": float(stats.min),
"avg": float(stats.avg),
"count": stats.count,
"nan_count": stats.nan_count,
"neg_inf_count": stats.neg_inf_count,
@@ -302,8 +302,7 @@ class TensorProcessor(BaseProcessor):
if step != tensor.step:
continue
step_in_cache = True
ndarray = value.get_or_calc_ndarray()
res_data = get_specific_dims_data(ndarray, dims, list(value.dims))
res_data = get_specific_dims_data(value.ndarray, dims, list(value.dims))
flatten_data = res_data.flatten().tolist()
if len(flatten_data) > MAX_TENSOR_RESPONSE_DATA_SIZE:
raise ResponseDataExceedMaxValueError("the size of response data: {} exceed max value: {}."
@@ -326,7 +325,7 @@ class TensorProcessor(BaseProcessor):
elif np.isposinf(data):
transfer_data[index] = 'INF'
else:
transfer_data[index] = data
transfer_data[index] = float(data)
return transfer_data

stats = get_statistics_from_tensor(res_data)


+ 10
- 7
mindinsight/ui/src/locales/en-us.json View File

@@ -49,7 +49,8 @@
"dataTraceback": "Dataset Lineage",
"comparePlate": "Comparison Dashboard",
"disableProfilerTip": "Failed to view profiling because no profiler log is available.",
"hardwareVisual": "Hardware Resources"
"hardwareVisual": "Hardware Resources",
"openNewTab": "Open Link in New Tab"
},
"modelTraceback": {
"summaryPath": "Summary Path",
@@ -231,10 +232,10 @@
"card": " ",
"searchByType": "Enter operator type",
"searchByName": "Enter operator name",
"operatorInfo":"Operator",
"kernelInfo":"Kernel",
"searchByCoreName":"Enter kernel name",
"searchByCoreFullName":"Enter operator full name"
"operatorInfo": "Operator",
"kernelInfo": "Kernel",
"searchByCoreName": "Enter kernel name",
"searchByCoreFullName": "Enter operator full name"
},
"profiling": {
"profilingDashboard": "Profiling Dashboard",
@@ -391,8 +392,10 @@
"content31": "You can analyze whether the flow tiling policy is proper and whether the step interval and tail time are too long based on the timeline information.",
"content32": "You can also locate an operator and view and analyze its execution time."
},
"countUnit": "times",
"unit": "ms/time",
"gpuunit": "us/time"
"gpuunit": "us/time",
"chartTitle": "Average Time Consumption Ranking"
},
"hardwareVisual": {
"processor": "Ascend AI Processor",
@@ -448,7 +451,7 @@
"gridAccuracy": "Decimal places are reserved.",
"inCorrectInput": "Invalid input.",
"gridTableNoData": "No data in the table.",
"cache":"CACHING"
"cache": "CACHING"
},
"error": {
"50540000": "System error.",


+ 14
- 11
mindinsight/ui/src/locales/zh-cn.json View File

@@ -49,7 +49,8 @@
"dataTraceback": "数据溯源",
"comparePlate": "对比看板",
"disableProfilerTip": "无profiler日志,无法查看性能分析",
"hardwareVisual": "硬件资源"
"hardwareVisual": "硬件资源",
"openNewTab": "打开新页签"
},
"modelTraceback": {
"summaryPath": "训练日志路径",
@@ -390,8 +391,10 @@
"content31": "您可以通过时间线信息分析流切分方法是否合理、迭代间隙和拖尾时间是否过长等;",
"content32": "也可以具体定位到某个算子,查看分析它的执行时间。"
},
"countUnit": "次",
"unit": "ms/次",
"gpuunit": "us/次"
"gpuunit": "us/次",
"chartTitle": "平均耗时排行"
},
"hardwareVisual": {
"processor": "昇腾AI处理器",
@@ -427,14 +430,14 @@
"availableFree": "芯片空闲",
"availableBusy": "芯片已被占用或不可用",
"failQueryChip": "芯片信息查询有误",
"name":"名称",
"npu":"编号",
"available":"是否空闲",
"health":"健康状态",
"ipAddress":"IP 地址",
"hbmUsage":"已用HBM内存(MB)",
"power":"功率(W)",
"temp":"温度(℃)"
"name": "名称",
"npu": "编号",
"available": "是否空闲",
"health": "健康状态",
"ipAddress": "IP 地址",
"hbmUsage": "已用HBM内存(MB)",
"power": "功率(W)",
"temp": "温度(℃)"
},
"components": {
"summaryTitle": "训练选择",
@@ -447,7 +450,7 @@
"gridAccuracy": "保留小数位",
"inCorrectInput": "无效输入",
"gridTableNoData": "表格无数据",
"cache":"正在加载"
"cache": "正在加载"
},
"error": {
"50540000": "系统错误",


+ 1
- 1
mindinsight/ui/src/store.js View File

@@ -29,7 +29,7 @@ export default new Vuex.Store({
// reload time
timeReloadValue: localStorage.timeReloadValue
? localStorage.timeReloadValue
: 3,
: 10,
// Scheduled hardware reload flag
isHardwareTimeReload: localStorage.isHardwareTimeReload === 'false' ? false : true,
// hardware reload time


+ 272
- 213
mindinsight/ui/src/views/profiling-gpu/operator.vue View File

@@ -25,6 +25,7 @@ limitations under the License.
:class="{fullScreen:fullScreen}"
v-if="operatorCharts.data.length">
<div>
<div class="chart-title">{{$t('profiling.chartTitle')}}</div>
<el-radio-group class="chart-radio-group"
v-model="operatorCharts.type"
@change="operatorChartChange"
@@ -99,13 +100,14 @@ limitations under the License.
:property="ele"
:key="key"
:sortable="ele === 'op_info' ? false : 'custom'"
show-overflow-tooltip>
:min-width="(ele === 'op_type') ? 100 : (ele === 'op_name') ?
120 : (ele === 'op_full_name') ? 150 : '' "
:show-overflow-tooltip="(ele === 'op_full_name'||ele === 'op_name'
||ele==='op_type') ? false : true">
<template slot="header">
<div class="custom-label"
:title="(ele==='op_total_time'||ele==='op_avg_time'||ele==='cuda_activity_cost_time')
?`${ele} (${$t('profiling.gpuunit')})`:ele">
{{(ele==='op_total_time'||ele==='op_avg_time'||ele==='cuda_activity_cost_time')
?`${ele} (${$t('profiling.gpuunit')})`:ele}}
:title="getHeaderField(ele)">
{{getHeaderField(ele)}}
</div>
</template>
</el-table-column>
@@ -126,8 +128,8 @@ limitations under the License.
sortable>
<template slot="header">
<div class="custom-label"
:title="(item==='total_time'||item==='avg_time')?`${item} (${$t('profiling.gpuunit')})`:item">
{{(item==='total_time'||item==='avg_time')?`${item} (${$t('profiling.gpuunit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -144,13 +146,14 @@ limitations under the License.
:property="item"
:key="$index"
:sortable="item === 'op_info' ? false : 'custom'"
show-overflow-tooltip>
:min-width="(item === 'op_type') ? 100 : (item === 'op_name')
? 120 : (item === 'op_full_name') ? 150 : '' "
:show-overflow-tooltip="(item === 'op_full_name' || item === 'op_name'
|| item === 'op_type') ? false : true">
<template slot="header">
<div class="custom-label"
:title="(item==='op_total_time'||item==='op_avg_time'||item==='cuda_activity_cost_time')
?`${item} (${$t('profiling.gpuunit')})`:item">
{{(item==='op_total_time'||item==='op_avg_time'||item==='cuda_activity_cost_time')
?`${item} (${$t('profiling.gpuunit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -180,6 +183,7 @@ limitations under the License.
:class="{fullScreen:fullScreenKernel}"
v-if="coreCharts.data.length">
<div>
<div class="chart-title">{{$t('profiling.chartTitle')}}</div>
</div>
<div class="cl-profiler-echarts">
<div class
@@ -225,13 +229,14 @@ limitations under the License.
:property="item"
:key="$index"
sortable="custom"
show-overflow-tooltip>
:min-width="(item === 'type') ? 100 : (item === 'name' || item === 'op_full_name')
? 150 : '' "
:show-overflow-tooltip="(item === 'op_full_name' || item === 'name'
||item === 'type') ? false : true">
<template slot="header">
<div class="custom-label"
:title="(item==='total_duration'||item==='avg_duration'||item==='max_duration'
|| item==='min_duration')?`${item} (${$t('profiling.gpuunit')})`:item">
{{(item==='total_duration'||item==='avg_duration'||item==='max_duration'||item==='min_duration')
?`${item} (${$t('profiling.gpuunit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -363,6 +368,29 @@ export default {
this.$bus.$off('collapse');
},
methods: {
getHeaderField(key) {
const maps = {
total_time: 'total_time (us)',
avg_time: `avg_time (${this.$t('profiling.gpuunit')})`,
op_total_time: 'op_total_time (us)',
op_avg_time: `op_avg_time (${this.$t('profiling.gpuunit')})`,
max_duration: 'max_duration (us)',
min_duration: 'min_duration (us)',
avg_duration: 'avg_duration (us)',
total_duration: 'total_duration (us)',
proportion: 'total_time_proportion (%)',
cuda_activity_cost_time: 'cuda_activity_cost_time (us)',
cuda_activity_call_count: `cuda_activity_call_count (${this.$t(
'profiling.countUnit',
)})`,
type_occurrences: `type_occurrences (${this.$t(
'profiling.countUnit',
)})`,
op_occurrences: `op_occurrences (${this.$t('profiling.countUnit')})`,
occurrences: `occurrences (${this.$t('profiling.countUnit')})`,
};
return maps[key] ? maps[key] : key;
},
resizeEchart() {
if (this.operatorCharts.chartDom) {
setTimeout(() => {
@@ -400,7 +428,16 @@ export default {
this.getCoreList(true);
}
},
opTypeSortChange() {
/**
* Operators type sort
* @param {Object} sort Sort data
*/
opTypeSortChange(sort) {
this.op_sort_condition = {
name: sort.prop,
type: sort.order,
};

this.$nextTick(() => {
const item = this.$refs['expandChild'];
if (item && this.curActiveRow.rowItem) {
@@ -856,6 +893,8 @@ export default {
setOption(chart) {
const option = {};
const maxLabelLength = 20;
const maxTooltipLen = 50;

if (!chart.type) {
option.legend = {
data: [],
@@ -871,17 +910,25 @@ export default {
: chart.data[i].name;
legendStr = `{a|${i + 1}}{b|${name} ${chart.data[
i
].value.toFixed(3)}}\n{c|${
chart.data[i].percent
? chart.data[i].percent.toFixed(2) + '%'
: ''
}}`;
].value.toFixed(3)}}\n{c|}`;
}
}
return legendStr;
},
tooltip: {
show: true,
formatter: (params) => {
let name = params.name;
name = name.replace(/</g, '< ');

const breakCount = Math.ceil(name.length / maxTooltipLen);
let str = '';
for (let i = 0; i < breakCount; i++) {
const temp = name.substr(i * maxTooltipLen, maxTooltipLen);
str += str ? '<br/>' + temp : temp;
}
return str;
},
},
itemWidth: 18,
itemHeight: 18,
@@ -912,7 +959,16 @@ export default {
option.tooltip = {
trigger: 'item',
formatter: (params) => {
return `${params.marker} ${params.data.name} ${params.percent}%`;
const name = params.data.name.replace(/</g, '< ');
const strTemp = `${name} ${params.percent.toFixed(2) + '%'}`;

const breakCount = Math.ceil(strTemp.length / maxTooltipLen);
let str = '';
for (let i = 0; i < breakCount; i++) {
const temp = strTemp.substr(i * maxTooltipLen, maxTooltipLen);
str += str ? '<br/>' + temp : temp;
}
return str;
},
confine: true,
};
@@ -1049,225 +1105,228 @@ export default {
<style lang="scss">
.operator {
height: 100%;
}
.clear {
clear: both;
}
.el-tabs__item {
color: #6c7280;
font-size: 16px;
line-height: 36px;
height: 36px;
}
.el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
.operator-title {
padding: 0 15px;
font-size: 16px;
font-weight: bold;
}
.cl-profiler {
height: calc(100% - 21px);
overflow-y: auto;
width: 100%;
background: #fff;
padding: 0 16px;
overflow: hidden;
.custom-label {
max-width: calc(100% - 25px);
padding: 0;
vertical-align: middle;
}
.el-tabs {
height: 100%;
.el-tabs__header {
margin-bottom: 10px;
}
}
.el-tabs__content {
height: calc(100% - 46px);
}
.el-tab-pane {
height: 100%;
.clear {
clear: both;
}
.cl-search-box {
float: right;
margin-bottom: 10px;
margin-right: 20px;
.el-tabs__item {
color: #6c7280;
line-height: 36px;
height: 36px;
}
.cl-profiler-top {
height: 45%;
.el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
.cl-profiler-top.fullScreen {
display: none;
}
.cl-profiler-bottom {
height: 55%;
padding-top: 10px;
.fullScreen {
float: right;
margin-top: 5px;
cursor: pointer;
}
.operator-title {
padding: 0 15px;
font-size: 16px;
font-weight: bold;
}
.cl-profiler-bottom.fullScreen {
height: 100%;
}
.core-search-type {
float: right;
width: 130px;
margin-right: 10px;
}
.cl-profiler-echarts {
.cl-profiler {
height: calc(100% - 21px);
overflow-y: auto;
width: 100%;
height: calc(100% - 32px);
display: inline-block;
position: relative;
overflow: auto;
#core-echarts,
#operator-echarts {
width: 100%;
background: #fff;
padding: 0 16px;
overflow: hidden;
.custom-label {
max-width: calc(100% - 25px);
padding: 0;
vertical-align: middle;
}
.el-tabs {
height: 100%;
min-width: 1300px;
min-height: 306px;
overflow: hidden;
.el-tabs__header {
margin-bottom: 10px;
}
}
.el-tabs__content {
height: calc(100% - 46px);
}
.el-tab-pane {
height: 100%;
}
.cl-search-box {
float: right;
margin-bottom: 10px;
margin-right: 20px;
}
}
.core-tab {
.cl-profiler-top {
height: calc(45% - 40px);
height: 45%;
.chart-title {
float: left;
font-weight: bold;
}
}
.cl-profiler-bottom {
height: calc(55% + 40px);
.cl-profiler-top.fullScreen {
display: none;
}
.cl-profiler-echarts {
height: 100%;
.cl-profiler-bottom {
height: 55%;
padding-top: 10px;
.fullScreen {
float: right;
margin-top: 5px;
cursor: pointer;
}
}
.cl-profiler-bottom.fullScreen {
height: 100%;
}
}
.chart-radio-group {
float: right;
}
.el-radio-group {
.el-radio-button--small .el-radio-button__inner {
height: 30px;
width: 70px;
font-size: 14px;
line-height: 10px;
.core-search-type {
float: right;
width: 130px;
margin-right: 10px;
}
}
.cl-profiler-bar {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
padding: 20px;
}
.cl-profiler-table-type {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
}
.el-pagination {
margin: 7px 0;
float: right;
}
.details-data-list {
.el-table {
th {
padding: 10px 0;
border-top: 1px solid #ebeef5;
.cell {
border-left: 1px solid #d9d8dd;
height: 14px;
line-height: 14px;
}
.cl-profiler-echarts {
width: 100%;
height: calc(100% - 32px);
display: inline-block;
position: relative;
overflow: auto;
#core-echarts,
#operator-echarts {
width: 100%;
height: 100%;
min-width: 1300px;
min-height: 306px;
overflow: hidden;
}
th:first-child {
.cell {
border-left: none;
}
}
.core-tab {
.cl-profiler-top {
height: 45%;
}
th:nth-child(2),
td:nth-child(2) {
max-width: 30%;
.cl-profiler-bottom {
height: 55%;
}
td {
padding: 8px 0;
.cl-profiler-echarts {
height: calc(100% - 32px);
}
.cl-profiler-bottom.fullScreen {
height: 100%;
}
}
.el-table__row--level-0 td:first-child:after {
width: 20px;
height: 1px;
background: #ebeef5;
z-index: 11;
position: absolute;
left: 0;
bottom: -1px;
content: '';
display: block;
.chart-radio-group {
float: right;
}
.el-table__row--level-1 {
td {
padding: 4px 0;
position: relative;
.el-radio-group {
.el-radio-button--small .el-radio-button__inner {
height: 30px;
width: 70px;
font-size: 14px;
line-height: 10px;
}
td:first-child::before {
width: 42px;
background: #f0fdfd;
border-right: 2px #00a5a7 solid;
z-index: 10;
}
.cl-profiler-bar {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
padding: 20px;
}
.cl-profiler-table-type {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
}
.el-pagination {
margin: 7px 0;
float: right;
}
.details-data-list {
.el-table {
th {
padding: 10px 0;
border-top: 1px solid #ebeef5;
.cell {
border-left: 1px solid #d9d8dd;
height: 14px;
line-height: 14px;
}
}
th:first-child {
.cell {
border-left: none;
}
}
th:nth-child(2),
td:nth-child(2) {
max-width: 30%;
}
td {
padding: 8px 0;
}
}
.el-table__row--level-0 td:first-child:after {
width: 20px;
height: 1px;
background: #ebeef5;
z-index: 11;
position: absolute;
left: 0;
top: -1px;
bottom: 0px;
bottom: -1px;
content: '';
display: block;
}
}
.el-table__row--level-1 {
td {
padding: 4px 0;
position: relative;
}
td:first-child::before {
width: 42px;
background: #f0fdfd;
border-right: 2px #00a5a7 solid;
z-index: 10;
position: absolute;
left: 0;
top: -1px;
bottom: 0px;
content: '';
display: block;
}
}

.el-table__row--level-1:first-child {
td:first-child::before {
bottom: 0;
.el-table__row--level-1:first-child {
td:first-child::before {
bottom: 0;
}
}
}
}
.el-table__expanded-cell[class*='cell'] {
padding: 0;
}
.expand-table {
position: relative;
padding-left: 44px;
}
.expand-table::before {
content: '';
position: absolute;
left: 0;
top: 0;
height: 100%;
background: #f0fdfd;
width: 42px;
border-right: 2px #00a5a7 solid;
}
.el-radio-button:last-child .el-radio-button__inner,
.el-radio-button:first-child .el-radio-button__inner {
border-radius: 0;
}
.image-noData {
width: 100%;
height: 100%;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
p {
font-size: 16px;
padding-top: 10px;
.el-table__expanded-cell[class*='cell'] {
padding: 0;
}
.expand-table {
position: relative;
padding-left: 44px;
}
.expand-table::before {
content: '';
position: absolute;
left: 0;
top: 0;
height: 100%;
background: #f0fdfd;
width: 42px;
border-right: 2px #00a5a7 solid;
}
.el-radio-button:last-child .el-radio-button__inner,
.el-radio-button:first-child .el-radio-button__inner {
border-radius: 0;
}
.image-noData {
width: 100%;
height: 100%;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
p {
font-size: 16px;
padding-top: 10px;
}
}
}
}


+ 21
- 0
mindinsight/ui/src/views/profiling-gpu/profiling.vue View File

@@ -20,6 +20,10 @@ limitations under the License.
:class="{collapse:collapse}">
<div class="helper"
v-show="!collapse">
<div class="summary-path">
{{$t('trainingDashboard.summaryDirPath')}}
<span>{{ summaryPath}}</span>
</div>
<div class="cur-card">
<label>{{$t('profiling.curCard')}}</label>
<el-select v-model="curDashboardInfo.curCardNum"
@@ -61,6 +65,7 @@ import RequestService from '../../services/request-service';
export default {
data() {
return {
summaryPath: '',
tipsArrayList: [
'step_trace-iter_interval',
'minddata_pipeline-general',
@@ -95,6 +100,7 @@ export default {
this.curDashboardInfo.query.id = this.$route.query.id;
this.curDashboardInfo.query.dir = this.$route.query.dir;
this.curDashboardInfo.query.path = this.$route.query.path;
this.summaryPath = decodeURIComponent( this.$route.query.id);
this.getDeviceList();
} else {
this.curDashboardInfo.query.trainingJobId = '';
@@ -330,11 +336,26 @@ export default {
}
.helper {
padding: 32px;
padding-top: 20px;
height: 100%;
overflow-y: auto;
margin-left: 24px;
background: #edf0f5;
word-wrap: break-word;
.summary-path {
line-height: 24px;
font-size: 14px;
overflow: hidden;
font-weight: bold;
padding-bottom: 10px;
word-break: break-all;
text-overflow: -o-ellipsis-lastline;
overflow: hidden;
text-overflow: ellipsis;
display: -webkit-box;
-webkit-line-clamp: 4;
-webkit-box-orient: vertical;
}
.nowrap-style {
white-space: nowrap;
}


+ 5
- 1
mindinsight/ui/src/views/profiling/data-process.vue View File

@@ -1293,7 +1293,7 @@ export default {
.md-wrap {
height: 100%;
background: #fff;
padding: 0 32px;
padding: 0 16px;
.title {
font-size: 16px;
font-weight: bold;
@@ -1308,6 +1308,10 @@ export default {
height: 100%;
}
}
.el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
.md-top {
height: 20%;
font-size: 0;


+ 248
- 202
mindinsight/ui/src/views/profiling/operator.vue View File

@@ -25,6 +25,7 @@ limitations under the License.
:class="{fullScreen:fullScreen}"
v-if="coreCharts.data.length">
<div>
<div class="chart-title">{{$t('profiling.chartTitle')}}</div>
<el-radio-group class="chart-radio-group"
v-model="coreCharts.type"
@change="coreChartChange"
@@ -105,8 +106,8 @@ limitations under the License.
show-overflow-tooltip>
<template slot="header">
<div class="custom-label"
:title="ele==='avg_execution_time'?`${ele} (${$t('profiling.unit')})`:ele">
{{ele==='avg_execution_time'?`${ele} (${$t('profiling.unit')})`:ele}}
:title="getHeaderField(ele)">
{{getHeaderField(ele)}}
</div>
</template>
</el-table-column>
@@ -127,8 +128,8 @@ limitations under the License.
sortable>
<template slot="header">
<div class="custom-label"
:title="item==='execution_time'?`${item} (${$t('profiling.unit')})`:item">
{{item==='execution_time'?`${item} (${$t('profiling.unit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -151,8 +152,8 @@ limitations under the License.
show-overflow-tooltip>
<template slot="header">
<div class="custom-label"
:title="item==='avg_execution_time'?`${item} (${$t('profiling.unit')})`:item">
{{item==='avg_execution_time'?`${item} (${$t('profiling.unit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -209,10 +210,8 @@ limitations under the License.
show-overflow-tooltip>
<template slot="header">
<div class="custom-label"
:title="(item==='total_time' || item==='dispatch_time')?
`${item} (${$t('profiling.unit')})`:item">
{{(item==='total_time' || item==='dispatch_time')?
`${item} (${$t('profiling.unit')})`:item}}
:title="getHeaderField(item)">
{{getHeaderField(item)}}
</div>
</template>
</el-table-column>
@@ -359,6 +358,19 @@ export default {
this.$bus.$off('collapse');
},
methods: {
getHeaderField(key) {
const maps = {
execution_time: `execution_time (${this.$t('profiling.unit')})`,
avg_execution_time: `avg_execution_time (${this.$t('profiling.unit')})`,
execution_frequency: `execution_frequency (${this.$t(
'profiling.countUnit',
)})`,
percent: 'percent (%)',
total_time: 'total_time (ms)',
dispatch_time: 'dispatch_time (ms)',
};
return maps[key] ? maps[key] : key;
},
resizeEchart() {
if (this.coreCharts.chartDom) {
setTimeout(() => {
@@ -387,7 +399,15 @@ export default {
this.getCpuList(true);
}
},
opTypeSortChange() {
/**
* Operators type sort
* @param {Object} sort Sort data
*/
opTypeSortChange(sort) {
this.op_sort_condition = {
name: sort.prop,
type: sort.order,
};
this.$nextTick(() => {
const item = this.$refs['expandChild'];
if (item && this.curActiveRow.rowItem) {
@@ -837,6 +857,8 @@ export default {
setOption(chart) {
const option = {};
const maxLabelLength = 20;
const maxTooltipLen = 50;

if (!chart.type) {
option.legend = {
data: [],
@@ -859,6 +881,18 @@ export default {
},
tooltip: {
show: true,
formatter: (params) => {
let name = params.name;
name = name.replace(/</g, '< ');

const breakCount = Math.ceil(name.length / maxTooltipLen);
let str = '';
for (let i = 0; i < breakCount; i++) {
const temp = name.substr(i * maxTooltipLen, maxTooltipLen);
str += str ? '<br/>' + temp : temp;
}
return str;
},
},
itemWidth: 18,
itemHeight: 18,
@@ -889,7 +923,16 @@ export default {
option.tooltip = {
trigger: 'item',
formatter: (params) => {
return `${params.marker} ${params.data.name} ${params.percent}%`;
const name = params.data.name.replace(/</g, '< ');
const strTemp = `${name} ${params.percent.toFixed(2) + '%'}`;

const breakCount = Math.ceil(strTemp.length / maxTooltipLen);
let str = '';
for (let i = 0; i < breakCount; i++) {
const temp = strTemp.substr(i * maxTooltipLen, maxTooltipLen);
str += str ? '<br/>' + temp : temp;
}
return str;
},
confine: true,
};
@@ -1101,225 +1144,228 @@ export default {
<style lang="scss">
.operator {
height: 100%;
}
.clear {
clear: both;
}
.el-tabs__item {
color: #6c7280;
font-size: 16px;
line-height: 36px;
height: 36px;
}
.el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
.operator-title {
padding: 0 15px;
font-size: 16px;
font-weight: bold;
}
.cl-profiler {
height: calc(100% - 21px);
overflow-y: auto;
width: 100%;
background: #fff;
padding: 0 16px;
overflow: hidden;
.custom-label {
max-width: calc(100% - 25px);
padding: 0;
vertical-align: middle;
}
.el-tabs {
height: 100%;
.el-tabs__header {
margin-bottom: 10px;
}
}
.el-tabs__content {
height: calc(100% - 46px);
}
.el-tab-pane {
height: 100%;
.clear {
clear: both;
}
.cl-search-box {
float: right;
margin-bottom: 10px;
margin-right: 20px;
.el-tabs__item {
color: #6c7280;
line-height: 36px;
height: 36px;
}
.cl-profiler-top {
height: 45%;
.el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
.cl-profiler-top.fullScreen {
display: none;
.operator-title {
padding: 0 15px;
font-size: 16px;
font-weight: bold;
}
.cl-profiler-bottom {
height: 55%;
padding-top: 10px;
.fullScreen {
.cl-profiler {
height: calc(100% - 21px);
overflow-y: auto;
width: 100%;
background: #fff;
padding: 0 16px;
overflow: hidden;
.custom-label {
max-width: calc(100% - 25px);
padding: 0;
vertical-align: middle;
}
.el-tabs {
height: 100%;
.el-tabs__header {
margin-bottom: 10px;
}
}
.el-tabs__content {
height: calc(100% - 46px);
}
.el-tab-pane {
height: 100%;
}
.cl-search-box {
float: right;
margin-top: 5px;
cursor: pointer;
margin-bottom: 10px;
margin-right: 20px;
}
}
.cl-profiler-bottom.fullScreen {
height: 100%;
}
.cpu-tab {
.cl-profiler-top {
height: calc(36% + 32px);
height: 45%;
.chart-title {
float: left;
font-weight: bold;
}
}
.cl-profiler-top.fullScreen {
display: none;
}
.cl-profiler-bottom {
height: 100%;
height: 55%;
padding-top: 10px;
.fullScreen {
float: right;
margin-top: 5px;
cursor: pointer;
}
}
}
.cl-profiler-echarts {
width: 100%;
height: calc(100% - 32px);
display: inline-block;
position: relative;
overflow: auto;
#cpu-echarts,
#core-echarts {
width: 100%;
.cl-profiler-bottom.fullScreen {
height: 100%;
min-width: 1300px;
min-height: 306px;
overflow: hidden;
}
}
.chart-radio-group {
float: right;
}
.el-radio-group {
.el-radio-button--small .el-radio-button__inner {
height: 30px;
width: 70px;
font-size: 14px;
line-height: 10px;
}
}
.cl-profiler-bar {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
padding: 20px;
}
.cl-profiler-table-type {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
}
.el-pagination {
margin: 7px 0;
float: right;
}
.details-data-list {
.el-table {
th {
padding: 10px 0;
border-top: 1px solid #ebeef5;
.cell {
border-left: 1px solid #d9d8dd;
height: 14px;
line-height: 14px;
}
.cpu-tab {
.cl-profiler-top {
height: calc(36% + 32px);
}
th:first-child {
.cell {
border-left: none;
}
.cl-profiler-bottom {
height: 100%;
}
th:nth-child(2),
td:nth-child(2) {
max-width: 30%;
}
.cl-profiler-echarts {
width: 100%;
height: calc(100% - 32px);
display: inline-block;
position: relative;
overflow: auto;
#cpu-echarts,
#core-echarts {
width: 100%;
height: 100%;
min-width: 1300px;
min-height: 306px;
overflow: hidden;
}
td {
padding: 8px 0;
}
.chart-radio-group {
float: right;
}
.el-radio-group {
.el-radio-button--small .el-radio-button__inner {
height: 30px;
width: 70px;
font-size: 14px;
line-height: 10px;
}
}
.el-table__row--level-0 td:first-child:after {
width: 20px;
height: 1px;
background: #ebeef5;
z-index: 11;
position: absolute;
left: 0;
bottom: -1px;
content: '';
display: block;
.cl-profiler-bar {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
padding: 20px;
}
.cl-profiler-table-type {
display: inline-block;
width: calc(100% - 400px);
vertical-align: top;
height: 100%;
}
.el-pagination {
margin: 7px 0;
float: right;
}
.el-table__row--level-1 {
td {
padding: 4px 0;
position: relative;
.details-data-list {
.el-table {
th {
padding: 10px 0;
border-top: 1px solid #ebeef5;
.cell {
border-left: 1px solid #d9d8dd;
height: 14px;
line-height: 14px;
}
}
th:first-child {
.cell {
border-left: none;
}
}
th:nth-child(2),
td:nth-child(2) {
max-width: 30%;
}
td {
padding: 8px 0;
}
}
td:first-child::before {
width: 42px;
background: #f0fdfd;
border-right: 2px #00a5a7 solid;
z-index: 10;
.el-table__row--level-0 td:first-child:after {
width: 20px;
height: 1px;
background: #ebeef5;
z-index: 11;
position: absolute;
left: 0;
top: -1px;
bottom: 0px;
bottom: -1px;
content: '';
display: block;
}
}
.el-table__row--level-1 {
td {
padding: 4px 0;
position: relative;
}
td:first-child::before {
width: 42px;
background: #f0fdfd;
border-right: 2px #00a5a7 solid;
z-index: 10;
position: absolute;
left: 0;
top: -1px;
bottom: 0px;
content: '';
display: block;
}
}

.el-table__row--level-1:first-child {
td:first-child::before {
bottom: 0;
.el-table__row--level-1:first-child {
td:first-child::before {
bottom: 0;
}
}
.el-dialog__title {
font-weight: bold;
}
.el-dialog__body {
max-height: 500px;
padding-top: 10px;
overflow: auto;
.details-data-title {
margin-bottom: 20px;
}
}
}
.el-dialog__title {
font-weight: bold;
.el-table__expanded-cell[class*='cell'] {
padding: 0;
}
.el-dialog__body {
max-height: 500px;
padding-top: 10px;
overflow: auto;
.details-data-title {
margin-bottom: 20px;
}
.expand-table {
position: relative;
padding-left: 44px;
}
}
.el-table__expanded-cell[class*='cell'] {
padding: 0;
}
.expand-table {
position: relative;
padding-left: 44px;
}
.expand-table::before {
content: '';
position: absolute;
left: 0;
top: 0;
height: 100%;
background: #f0fdfd;
width: 42px;
border-right: 2px #00a5a7 solid;
}
.el-radio-button:last-child .el-radio-button__inner,
.el-radio-button:first-child .el-radio-button__inner {
border-radius: 0;
}
.image-noData {
width: 100%;
height: 450px;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
p {
font-size: 16px;
padding-top: 10px;
.expand-table::before {
content: '';
position: absolute;
left: 0;
top: 0;
height: 100%;
background: #f0fdfd;
width: 42px;
border-right: 2px #00a5a7 solid;
}
.el-radio-button:last-child .el-radio-button__inner,
.el-radio-button:first-child .el-radio-button__inner {
border-radius: 0;
}
.image-noData {
width: 100%;
height: 450px;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
p {
font-size: 16px;
padding-top: 10px;
}
}
}
}


+ 2
- 1
mindinsight/ui/src/views/profiling/profiling.vue View File

@@ -65,7 +65,7 @@ import RequestService from '../../services/request-service';
export default {
data() {
return {
summaryPath: this.$route.query.summaryPath,
summaryPath: '',
tipsArrayList: [
'step_trace-iter_interval',
'minddata_pipeline-general',
@@ -101,6 +101,7 @@ export default {
this.curDashboardInfo.query.id = this.$route.query.id;
this.curDashboardInfo.query.dir = this.$route.query.dir;
this.curDashboardInfo.query.path = this.$route.query.path;
this.summaryPath = decodeURIComponent( this.$route.query.id);
this.getDeviceList();
} else {
this.curDashboardInfo.query.trainingJobId = '';


+ 1
- 3
mindinsight/ui/src/views/train-manage/compare-plate.vue View File

@@ -1208,9 +1208,7 @@ export default {
this.$store.commit('setIsReload', false);
this.isReloading = false;
}
if (error.response && error.response.data) {
this.clearAllData();
}
this.clearAllData();
},

/**


+ 118
- 13
mindinsight/ui/src/views/train-manage/summary-manage.vue View File

@@ -44,7 +44,8 @@ limitations under the License.
stripe
height="100%"
tooltip-effect="light"
class="list-el-table">
class="list-el-table"
ref="table">
<el-table-column width="50"
type=index
:label="$t('summaryManage.sorting')">
@@ -69,14 +70,15 @@ limitations under the License.
:label="$t('summaryManage.operation')"
width="240">
<template slot-scope="scope">
<el-button type="text"
@click.stop="goToTrainDashboard(scope.row)">
{{$t('summaryManage.viewDashboard')}} </el-button>
<el-button type="text"
class="operate-btn"
<span class="menu-item"
@contextmenu.prevent="rightClick(scope.row, $event, 0)"
@click.stop="goToTrainDashboard(scope.row)">
{{$t('summaryManage.viewDashboard')}} </span>
<span class="menu-item operate-btn"
v-if="scope.row.viewProfiler"
@contextmenu.prevent="rightClick(scope.row, $event, 1)"
@click.stop="goToProfiler(scope.row)">
{{$t('summaryManage.viewProfiler')}} </el-button>
{{$t('summaryManage.viewProfiler')}} </span>
<el-button type="text"
class="operate-btn"
disabled
@@ -99,6 +101,13 @@ limitations under the License.
</el-pagination>
</div>
</div>
<div id="contextMenu"
v-if="contextMenu.show"
:style="{left: contextMenu.left, top: contextMenu.top}">
<ul>
<li @click="doRightClick()">{{$t('summaryManage.openNewTab')}}</li>
</ul>
</div>
</div>
</template>

@@ -117,21 +126,50 @@ export default {
total: 0,
layout: 'total, prev, pager, next, jumper',
},
contextMenu: {
show: false,
left: '',
top: '',
data: null,
type: 0,
},
tableDom: null,
};
},
computed: {},
watch: {},
destroyed() {},
destroyed() {
window.removeEventListener('resize', this.closeMenu);
window.removeEventListener('mousewheel', this.closeMenu);
if (this.tableDom) {
this.tableDom.removeEventListener('scroll', this.closeMenu);
}
document.onclick = null;
document.onscroll = null;
},
activated() {},
mounted() {
document.title = `${this.$t('summaryManage.summaryList')}-MindInsight`;
this.$nextTick(() => {
this.init();
});
setTimeout(() => {
window.addEventListener('resize', this.closeMenu, false);
window.addEventListener('mousewheel', this.closeMenu, false);
this.tableDom = this.$refs.table.bodyWrapper;
this.tableDom.addEventListener('scroll', this.closeMenu, false);
}, 300);
},

methods: {
init() {
document.onclick = () => {
this.contextMenu.show = false;
};
document.onscroll = () => {
this.contextMenu.show = false;
};

const params = {
limit: this.pagination.pageSize,
offset: this.pagination.currentPage - 1,
@@ -187,34 +225,80 @@ export default {
* @param {Object} row select row
*/
goToTrainDashboard(row) {
this.contextMenu.show = false;
const trainId = encodeURIComponent(row.train_id);

const routeUrl = this.$router.resolve({
this.$router.push({
path: '/train-manage/training-dashboard',
query: {id: trainId},
});
window.open(routeUrl.href, '_blank');
},
/**
* go to Profiler
* @param {Object} row select row
*/
goToProfiler(row) {
this.contextMenu.show = false;
const profilerDir = encodeURIComponent(row.profiler_dir);
const trainId = encodeURIComponent(row.train_id);
const path = encodeURIComponent(row.relative_path);
const router = `/profiling${row.profiler_type === 'gpu' ? '-gpu' : ''}`;

const routeUrl = this.$router.resolve({
this.$router.push({
path: router,
query: {
dir: profilerDir,
id: trainId,
path: path,
summaryPath: row.train_id,
},
});
window.open(routeUrl.href, '_blank');
},

rightClick(row, event, type) {
const maxWidth = 175;
this.contextMenu.data = row;
this.contextMenu.type = type;
const width = document.getElementById('cl-summary-manage').clientWidth;
const left = Math.min(width - maxWidth, event.clientX + window.scrollX);
this.contextMenu.left = left + 'px';
this.contextMenu.top = event.clientY + window.scrollY + 'px';
this.contextMenu.show = true;
},

doRightClick(key) {
const row = this.contextMenu.data;
if (!row) {
return;
}
if (this.contextMenu.type) {
this.contextMenu.show = false;
const profilerDir = encodeURIComponent(row.profiler_dir);
const trainId = encodeURIComponent(row.train_id);
const path = encodeURIComponent(row.relative_path);
const router = `/profiling${row.profiler_type === 'gpu' ? '-gpu' : ''}`;

const routeUrl = this.$router.resolve({
path: router,
query: {
dir: profilerDir,
id: trainId,
path: path,
},
});
window.open(routeUrl.href, '_blank');
} else {
this.contextMenu.show = false;
const trainId = encodeURIComponent(row.train_id);

const routeUrl = this.$router.resolve({
path: '/train-manage/training-dashboard',
query: {id: trainId},
});
window.open(routeUrl.href, '_blank');
}
},
closeMenu() {
this.contextMenu.show = false;
},
},
components: {},
@@ -282,5 +366,26 @@ export default {
.operate-btn {
margin-left: 20px;
}
.menu-item {
color: #00a5a7;
cursor: pointer;
}
#contextMenu {
position: absolute;
min-width: 150px;
border: 1px solid #d4d4d4;
ul {
background-color: #f7faff;
border-radius: 2px;
li {
padding: 5px 18px;
cursor: pointer;
&:hover {
background-color: rgb(167, 167, 167);
color: white;
}
}
}
}
}
</style>

+ 4
- 4
mindinsight/wizard/README_CN.md View File

@@ -4,7 +4,7 @@

## 介绍

MindWizard是一款快速生成经典网络脚本的工具。工具根据用户选择,组合模型、超参、数据集等网络参数,自动生成目标网络脚本,生成的网络脚本可以在Ascend或GPU等环境上进行训练和推理
MindWizard是一款快速生成经典网络脚本的工具。工具根据用户选择,组合模型、超参、数据集等网络参数,自动生成目标网络脚本,生成的网络脚本可以在Ascend或GPU等环境上进行训练和评估

## 安装

@@ -33,7 +33,7 @@ optional arguments:

2. 请选择数据集(MNIST / Cifar10 / ImageNet / ...)

生成脚本后,用户可执行训练和推理,详细介绍可参考网络脚本工程中的README。
生成脚本后,用户可执行训练和评估,详细介绍可参考网络脚本工程中的README。

## 网络脚本工程结构

@@ -42,14 +42,14 @@ project
|- script
| |- run_standalone_train.sh # 单卡训练脚本
| |- run_distribute_train.sh # 多卡训练脚本
| |- run_eval.sh # 推理脚本
| |- run_eval.sh # 评估脚本
| |- ...
|- src
| |- config.py # 参数配置
| |- dataset.py # 数据集处理
| |- lenet.py/resent.py/... # 网络定义
| |- ...
|- eval.py # 网络推理
|- eval.py # 网络评估
|- train.py # 网络训练
|- README.md
```


+ 1
- 0
mindinsight/wizard/base/source_file.py View File

@@ -45,6 +45,7 @@ class SourceFile:
fp.write(self.content)
try:
shutil.copymode(self.template_file_path, new_file_path)
os.chmod(new_file_path, stat.S_IRUSR | stat.S_IWUSR)
self.set_writeable(new_file_path)
if new_file_path.endswith('.sh'):
self.set_executable(new_file_path)


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh-tpl View File

@@ -67,7 +67,6 @@ do
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp ../*.py ./train_parallel$i
cp *.sh ./train_parallel$i
cp -r ../src ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh-tpl View File

@@ -54,7 +54,6 @@ export RANK_SIZE=$DEVICE_NUM
rm -rf ./train_parallel
mkdir ./train_parallel
cp ../*.py ./train_parallel
cp *.sh ./train_parallel
cp -r ../src ./train_parallel
cd ./train_parallel || exit
echo "start training"


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh-tpl View File

@@ -56,7 +56,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh-tpl View File

@@ -57,7 +57,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh-tpl View File

@@ -59,7 +59,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh-tpl View File

@@ -59,7 +59,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training"


+ 1
- 1
mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl View File

@@ -28,7 +28,7 @@ cfg = edict({
'lr': 0.002,
"momentum": 0.9,
{% elif optimizer=='SGD' %}
'lr': 0.1,
'lr': 0.01,
{% else %}
'lr': 0.001,
{% endif %}


+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh-tpl View File

@@ -68,7 +68,6 @@ do
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp ../*.py ./train_parallel$i
cp *.sh ./train_parallel$i
cp -r ../src ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh-tpl View File

@@ -53,7 +53,6 @@ export RANK_SIZE=$DEVICE_NUM
rm -rf ./train_parallel
mkdir ./train_parallel
cp ../*.py ./train_parallel
cp *.sh ./train_parallel
cp -r ../src ./train_parallel
cd ./train_parallel || exit



+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh-tpl View File

@@ -56,7 +56,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh-tpl View File

@@ -57,7 +57,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh-tpl View File

@@ -60,7 +60,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh-tpl View File

@@ -61,7 +61,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit



+ 1
- 1
mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl View File

@@ -23,7 +23,7 @@ cfg = edict({
'lr': 0.01,
"momentum": 0.9,
{% elif optimizer=='SGD' %}
'lr': 0.1,
'lr': 0.01,
{% else %}
'lr': 0.001,
{% endif %}


+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh-tpl View File

@@ -67,7 +67,6 @@ do
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp ../*.py ./train_parallel$i
cp *.sh ./train_parallel$i
cp -r ../src ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh-tpl View File

@@ -54,7 +54,6 @@ export RANK_SIZE=$DEVICE_NUM
rm -rf ./train_parallel
mkdir ./train_parallel
cp ../*.py ./train_parallel
cp *.sh ./train_parallel
cp -r ../src ./train_parallel
cd ./train_parallel || exit



+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh-tpl View File

@@ -56,7 +56,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh-tpl View File

@@ -57,7 +57,6 @@ then
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log


+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh-tpl View File

@@ -59,7 +59,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"


+ 0
- 1
mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh-tpl View File

@@ -59,7 +59,6 @@ then
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training"


+ 1
- 1
mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl View File

@@ -30,7 +30,7 @@ cfg = ed({
"momentum": 0.9,
"lr": 0.01,
{% elif optimizer=='SGD' %}
'lr': 0.1,
'lr': 0.01,
{% else %}
'lr': 0.001,
{% endif %}


+ 1
- 0
mindinsight/wizard/create_project.py View File

@@ -91,6 +91,7 @@ class CreateProject(BaseCommand):
if not choice:
click.secho(textwrap.dedent("Network is required."), fg='red')

click.secho(textwrap.dedent("Your choice is %s." % network_type_choices[choice - 1]), fg='yellow')
return network_type_choices[choice - 1]

@staticmethod


Loading…
Cancel
Save