Browse Source

New features / Cluster dashboard and memory thermal diagram

pull/1328/head
weiyanxi 4 years ago
parent
commit
2c81f3ea7b
12 changed files with 1359 additions and 19 deletions
  1. +6
    -0
      mindinsight/ui/src/common/common-property.js
  2. +62
    -0
      mindinsight/ui/src/components/empty.vue
  3. +6
    -1
      mindinsight/ui/src/locales/en-us.json
  4. +6
    -1
      mindinsight/ui/src/locales/zh-cn.json
  5. +8
    -4
      mindinsight/ui/src/router.js
  6. +7
    -0
      mindinsight/ui/src/services/request-service.js
  7. +136
    -0
      mindinsight/ui/src/views/profiling/cluster-dashboard.vue
  8. +474
    -0
      mindinsight/ui/src/views/profiling/memory-heatmap.vue
  9. +264
    -0
      mindinsight/ui/src/views/profiling/performance-dashboard.vue
  10. +32
    -9
      mindinsight/ui/src/views/profiling/profiling-performance.vue
  11. +354
    -0
      mindinsight/ui/src/views/profiling/resource-dashboard-cluster.vue
  12. +4
    -4
      mindinsight/ui/src/views/train-manage/summary-manage.vue

+ 6
- 0
mindinsight/ui/src/common/common-property.js View File

@@ -145,4 +145,10 @@ export default {
'.Batch > polygon { stroke: #de504e; fill: #ffbcba; }' +
'.edge path { stroke: rgb(167, 167, 167); }' +
'.edge polygon { fill: rgb(167, 167, 167); stroke: rgb(167, 167, 167); }</style>',
clusterHeatmapDashboardColorArr: [
'#fff2d0', '#fbeeb2', '#f8eb94', '#c1e891', '#8ae58f', '#64d4ab', '#3fc3c8', '#36afc2', '#2d9cbc', '#1e74a0',
],
clusterHeatmapColorArr: [
'#fff2d0', '#f8eb94', '#8ae58f', '#3fc3c8', '#2d9cbc', '#104d85',
],
};

+ 62
- 0
mindinsight/ui/src/components/empty.vue View File

@@ -0,0 +1,62 @@
<!--
Copyright 2021 Huawei Technologies Co., Ltd.All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template>
<div class="empty-container">
<img :src="require('@/assets/images/nodata.png')"/>
<span>{{ text }}</span>
</div>
</template>

<script>
export const [NO_DATA, EMPTY_DATA, LOADING_DATA, COMING_SOON] = ['noData', 'emptyData', 'dataLoading', 'stayTuned'];
export default {
props: {
state: {
type: String,
default: LOADING_DATA,
},
},
data() {
return {
noData: this.$t('public.noData'),
emptyData: this.$t('public.emptyData'),
dataLoading: this.$t('public.dataLoading'),
stayTuned: this.$t('public.stayTuned'),
};
},
computed: {
text() {
if (this[this.state]) return this[this.state];
return '';
},
},
};
</script>

<style scoped>
.empty-container {
height: 100%;
width: 100%;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.empty-container img {
margin-bottom: 10px;
object-fit: none;
}
</style>

+ 6
- 1
mindinsight/ui/src/locales/en-us.json View File

@@ -539,9 +539,14 @@
},
"profilingCluster": {
"titleText": "Profiling - Cluster Overview",
"commTitle": "Communication - Cluster Overview",
"clusterView": "Cluster Overview",
"rankID": "Rank ID",
"timeTitle": "time(ms)"
"timeTitle": "time(ms)",
"performanceChartTitle": "Performance Analysis",
"memoryHeatMapTitle": "Memory Heat Map Analysis",
"deviceId": "Device",
"granuLarity": "GranuLarity"
},
"components": {
"summaryTitle": "Training Selection",


+ 6
- 1
mindinsight/ui/src/locales/zh-cn.json View File

@@ -538,9 +538,14 @@
},
"profilingCluster": {
"titleText": "性能分析 - 集群概览",
"commTitle": "通信信息 - 集群概览",
"clusterView": "集群概览",
"rankID": "逻辑卡号",
"timeTitle": "时间(ms)"
"timeTitle": "时间(ms)",
"performanceChartTitle": "性能分析",
"memoryHeatMapTitle": "内存热力图分析",
"deviceId": "设备",
"granuLarity": "粒度"
},
"components": {
"summaryTitle": "训练选择",


+ 8
- 4
mindinsight/ui/src/router.js View File

@@ -146,12 +146,16 @@ export default new Router({
],
},
{
path: '/profiling-cluster',
component: () => import('./views/profiling/profiling-cluster.vue'),
path: '/cluster-dashboard',
component: () => import('./views/profiling/cluster-dashboard.vue'),
},
{
path: '/profiling-gpu-cluster',
component: () => import('./views/profiling/profiling-cluster.vue'),
path: '/profiling-performance',
component: () => import('./views/profiling/profiling-performance.vue'),
},
{
path: '/memory-heatmap',
component: () => import('./views/profiling/memory-heatmap.vue'),
},
{
path: '/debugger',


+ 7
- 0
mindinsight/ui/src/services/request-service.js View File

@@ -525,4 +525,11 @@ export default {
url: 'v1/mindinsight/profile/cluster-step-trace-summary',
});
},
getClusterPeakMemory(params) {
return axios({
method: 'get',
params: params,
url: 'v1/mindinsight/profile/cluster-peak-memory',
});
},
};

+ 136
- 0
mindinsight/ui/src/views/profiling/cluster-dashboard.vue View File

@@ -0,0 +1,136 @@
<!--
Copyright 2021 Huawei Technologies Co., Ltd.All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template>
<div class="cl-cluster-dashboard">
<div class="cluster-head">
<span class="cl-title-left">{{$t('profilingCluster.clusterView')}}</span>
<div class="path-message">
<span>{{$t('symbols.leftbracket')}}</span>
<span>{{$t('trainingDashboard.summaryDirPath')}}</span>
<span>{{summaryPath}}</span>
<span>{{$t('symbols.rightbracket')}}</span>
</div>
</div>
<div class="content-container">
<div class="tab-container">
<el-tabs v-model="tabData.activeName"
@tab-click="onTabClick">
<el-tab-pane v-for="pane in tabData.tabPanes"
:key="pane.label"
:label="pane.label"
:name="pane.name"></el-tab-pane>
</el-tabs>
</div>
<div class="item-container">
<PerformanceDashboard v-if="tabData.activeName === '0'"
:activeName="'0'"></PerformanceDashboard>
<ResourceDashboard v-else
:activeName="'1'"></ResourceDashboard>
</div>
</div>
</div>
</template>
<script>
import ResourceDashboard from './resource-dashboard-cluster';
import PerformanceDashboard from './performance-dashboard';
export default {
components: {
ResourceDashboard,
PerformanceDashboard,
},
props: {},
data() {
return {
summaryPath: this.$route.query.path, // Path of the current training job
trainingJobId: this.$route.query.id, // ID of the current training job
summaryDir: this.$route.query.dir, // Dir of the current training job
tabData: {
activeName: this.$route.query.activeName || '0',
tabPanes: [
{
name: '0',
label: this.$t('profiling.trainingPerformance'),
},
{
name: '1',
label: this.$t('profiling.resourceUtilization'),
},
],
}, // The data of tab
};
},
created() {
if (!this.trainingJobId) {
this.$message.error(this.$t('trainingDashboard.invalidId'));
document.title = `${this.$t('profilingCluster.clusterView')}-MindInsight`;
return;
}
document.title = `${this.summaryPath}-${this.$t(
'profilingCluster.clusterView',
)}-MindInsight`;
},
methods: {
/**
* The logic of click tab item
*/
onTabClick() {
const {dir, id, path} = this.$route.query;
this.$router.push({
path: '/cluster-dashboard',
query: {dir, id, path, activeName: this.tabData.activeName},
});
},
},
};
</script>
<style>
.cl-cluster-dashboard {
height: 100%;
background: #FFF;
}
.cl-cluster-dashboard .cluster-head {
height: 56px;
line-height: 56px;
display: inline-block;
}
.cl-cluster-dashboard .cluster-head .path-message {
display: inline-block;
line-height: 20px;
padding: 18px 0;
font-weight: bold;
margin-left: 5px;
}
.cl-cluster-dashboard .content-container {
height: calc(100% - 56px);
padding: 0 32px 24px 32px;
}
.cl-cluster-dashboard .content-container .item-container{
height: calc(100% - 47px);
}
.cl-cluster-dashboard .tab-container {
width: 100%;
padding-bottom: 5px;
}
.cl-cluster-dashboard .tab-container .el-tabs__item {
font-size: 14px;
line-height: 14px;
height: 27px;
}
.cl-cluster-dashboard .tab-container .el-tabs__item.is-active {
color: #00a5a7;
font-weight: bold;
}
</style>

+ 474
- 0
mindinsight/ui/src/views/profiling/memory-heatmap.vue View File

@@ -0,0 +1,474 @@
<!--
Copyright 2021 Huawei Technologies Co., Ltd.All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template>
<div class="cl-memory-heatmap">
<div class="cl-cluster-title">
<div class="cl-cluster-title-left">{{$t("profilingCluster.memoryHeatMapTitle")}}</div>
<div class="path-message">
<span>{{$t('symbols.leftbracket')}}</span>
<span>{{$t('trainingDashboard.summaryDirPath')}}</span>
<span>{{summaryPath}}</span>
<span>{{$t('symbols.rightbracket')}}</span>
</div>
</div>
<div class="content">
<div class="legend-content">
<div class="legend-item"
v-for="(item, itemIndex) in colorArr"
:key="itemIndex">
<div class="color-item">
<div :style="{background: item.background}"></div>
</div>
<div class="value-item">{{`≥${item.range}`}}</div>
</div>
</div>
<p>{{$t('profilingCluster.granuLarity')+$t('symbols.colon')}}</p>
<el-select v-model="granuLarity"
class="select-granuLarity"
@change="getColorValue">
<el-option v-for="(item, index) in granuLarityList"
:key="index"
:value="item.label"
:label="item.label">
</el-option>
</el-select>
</div>
<div class="heatmap-content">
<div class="heatmap-item"
v-for="(item, itemIndex) in memoryHeatmapDataList"
:key="itemIndex"
:class="{'mt0': itemIndex < colNum}">
<div class="detail-content">
<div class="device-item"
v-for="(deviceItem, deviceItemIndex) in item.data"
:key="deviceItemIndex">
<div class="color-item">
<el-tooltip placement="top">
<div slot="content">
<div>
{{$t('profilingCluster.rankID') + $t('symbols.colon') + deviceItem.rankId}}
</div>
</div>
<div :style="{background:deviceItem.background}"
@click="jumpToMemory(deviceItem, item.hostIp)"></div>
</el-tooltip>
</div>
<div class="info-item">
{{$t('profilingCluster.deviceId') + deviceItem.deviceId}}
</div>
</div>
</div>
<div class="info-content">
{{item.hostIp}}
</div>
</div>
</div>
<!-- outer Page -->
<div class="pagination-content">
</div>
<img src="@/assets/images/close-page.png"
class="cl-cluster-close"
@click="backToDashboard">
</div>
</template>
<script>
import RequestService from '../../services/request-service';
import Color from '../../common/common-property';
export default {
props: {},
data() {
return {
summaryPath: this.$route.query.path,
trainingJobId: this.$route.query.id, // ID of the current training job
summaryDir: this.$route.query.dir,
memoryHeatmapInitOver: false, // init Heat map
memoryHeatmapDataList: [], // Heat map data
colNum: 4, // column number
granuLarityList: [], // Array of granularity
granuLarity: '0.1', // granuLarity value
colorArr: [], // Array of color index
};
},
watch: {},
computed: {},
created() {
this.granuLarityList = [
{label: '0.1', value: '0.1'},
{label: '0.05', value: '0.05'},
{label: '0.02', value: '0.02'},
];
this.getColorValue();
},
mounted() {
if (this.trainingJobId) {
document.title = `${this.summaryPath}-${this.$t('profilingCluster.memoryHeatMapTitle')}-MindInsight`;
this.getHeatMapData();
} else {
document.title = `${this.$t('profilingCluster.memoryHeatMapTitle')}-MindInsight`;
this.trainingJobId = '';
this.memoryHeatmapInitOver = true;
this.$message.error(this.$t('trainingDashboard.invalidId'));
}
},
methods: {
/**
* Get the color value
*/
getColorValue() {
this.colorArr = [];
this.index = 0;
const colorDepth = Color.clusterHeatmapColorArr;
const colorLength = 1 / this.granuLarity / (colorDepth.length - 1);
// 1: Used to calculate decimal places length
const fixedLength = this.granuLarity.length - (this.granuLarity.indexOf('.') + 1);
for (let i = 0; i < colorDepth.length - 1; i++) {
for (let j = 0; j < colorLength; j++) {
this.colorArr.push({
background: this.getGrientColor(colorDepth[i], colorDepth[i + 1], colorLength)[j],
range: (this.index * this.granuLarity).toFixed(fixedLength),
});
this.index++;
}
}
if (this.memoryHeatmapDataList.length) {
this.changeBackground();
}
},
/**
* Obtain heat map data
*/
getHeatMapData() {
const params = {
train_id: this.trainingJobId,
};
RequestService.getClusterPeakMemory(params).then((res) => {
if (!res || !res.data) {
this.memoryHeatmapInitOver = true;
return;
}
const resData = res.data;
const heatmapDataDic = {};
const HeatmapDataArr = [];
// merge host ip
resData.forEach((heatmap) => {
let dataIndex = heatmapDataDic[heatmap.host_ip];
if (isNaN(dataIndex)) {
dataIndex = HeatmapDataArr.length;
heatmapDataDic[heatmap.host_ip] = dataIndex;
HeatmapDataArr.push({
hostIp: heatmap.host_ip,
data: [],
});
}
HeatmapDataArr[dataIndex].data.push({
deviceId: heatmap.device_id,
rankId: heatmap.rank_id,
peakMem: heatmap.peak_mem,
capacity: heatmap.capacity,
value: heatmap.peak_mem / heatmap.capacity,
background: '',
});
});
// sort host ip by device id
this.memoryHeatmapDataList = this.sortByDeviceId(HeatmapDataArr);
this.changeBackground();
});
},
/**
* Calculate gradient color
* @param {String} startColor
* @param {String} endColor
* @param {Number} step
* @return {Array} Array of gradient color
*/
getGrientColor(startColor, endColor, step) {
const startRgb = this.formatColor(startColor);
const endRgb = this.formatColor(endColor);
const gapRgbR = (endRgb[0] - startRgb[0]) / step;
const gapRgbG = (endRgb[1] - startRgb[1]) / step;
const gapRgbB = (endRgb[2] - startRgb[2]) / step;
const colorResult = [];
for (let i = 0; i < step; i++) {
const sR = parseInt(gapRgbR * i + startRgb[0]);
const sG = parseInt(gapRgbG * i + startRgb[1]);
const sB = parseInt(gapRgbB * i + startRgb[2]);
const hex = this.formatColorToHex(`rgb(${sR},${sG},${sB})`);
colorResult.push(hex);
}
return colorResult;
},
/**
* Converts a color string to recognizable format
* @param {String} str Color string
* @return {Array} Value of RGB
*/
formatColor(str) {
if (!str) {
return;
}
const colorReg = /^([0-9a-fA-F]{3}|[0-9a-fA-F]{6})$/;
let colorStr = str.toLowerCase().slice(1);
if (colorReg.test(colorStr)) {
let colorStrNew = '';
if (colorStr.length === 3) {
for (let i = 0; i < 3; i++) {
colorStrNew += colorStrNew.slice(i, i + 1).concat(colorStrNew.slice(i, i + 1));
}
colorStr = colorStrNew;
}
const colorFormat = [];
for (let i = 0; i < 6; i += 2) {
colorFormat.push(parseInt(`0x${colorStr.slice(i, i + 2)}`));
}
return colorFormat;
} else {
return colorStr;
}
},
/**
* Converts rgb color string to hex
* @param {String} rgb Rgb color
* @return {String} Hex color
*/
formatColorToHex(rgb) {
const regRgb = /^(rgb|RGB)/g;
if (regRgb.test(rgb)) {
const colorSplit = rgb.replace(/(?:(|)|rgb|RGB)*/g, '').split(',');
let hexStr = '';
for (let i = 0; i < colorSplit.length; i++) {
let hexItem = Number(colorSplit[i]).toString(16);
hexItem = hexItem < 10 ? `0${hexItem}` : hexItem;
if (hexItem === '0') {
hexItem += hexItem;
}
hexStr += hexItem;
}
if (hexStr.length !== 6) {
hexStr = rgb;
}
return hexStr;
}
},
/**
* sort host ip by device id
* @param {Array} oriData Heat map data
* @return {Array} Heat map data
*/
sortByDeviceId(oriData) {
if (!oriData) {
return [];
}
oriData.forEach((hostData) => {
hostData.data.sort((a, b) => {
return a.deviceId - b.deviceId;
});
});
return oriData;
},
/**
* page turn memory
* @param {Object} item Jump parameters
* @param {String} hostIP host ip
*/
jumpToMemory(item, hostIP) {
this.$router.push({
path: '/profiling/memory-detail',
query: {
dir: this.summaryDir,
id: this.trainingJobId,
path: `${this.summaryPath}/cluster_profiler/${hostIP}`,
cardNum: item.deviceId,
deviceid: item.deviceId,
activeName: this.$route.query.activeName,
},
});
},
/**
* Change Color
*/
changeBackground() {
// Factor used to avoid JS floating point number question
const factor = 100;
this.memoryHeatmapDataList.forEach((data) => {
data.data.forEach((item) => {
const colorIndex = Math.floor((item.value * factor) / (+this.granuLarity * factor));
item.background = this.colorArr[colorIndex].background;
});
});
},
/**
* Back cluster
*/
backToDashboard() {
this.$router.push({
path: '/cluster-dashboard',
query: {
dir: this.summaryDir,
id: this.trainingJobId,
path: this.summaryPath,
activeName: this.$route.query.activeName,
},
});
},
},
};
</script>
<style scoped>
.cl-memory-heatmap {
height: 100%;
width: 100%;
padding: 0 32px 24px 32px;
background: #fff;
}
.cl-memory-heatmap .cl-cluster-title {
height: 56px;
line-height: 56px;
position: relative;
flex-shrink: 0;
}
.cl-memory-heatmap .cl-cluster-title .cl-cluster-title-left {
display: inline-block;
font-size: 20px;
font-weight: bold;
left: 0;
}
.cl-memory-heatmap .cl-cluster-title .path-message {
display: inline-block;
line-height: 20px;
padding: 18px 0;
font-weight: bold;
margin-left: 5px;
}
.cl-memory-heatmap .cl-cluster-close {
object-fit: none;
position: absolute;
cursor: pointer;
top: 82px;
right: 24px;
}
.cl-memory-heatmap .content {
position: absolute;
display: flex;
margin-top: -5px;
width: 150px;
right: 35px;
line-height: 56px;
}
.cl-memory-heatmap .content p {
line-height: 56px;
}
.cl-memory-heatmap .content .select-granuLarity {
width: 104px;
}
.cl-memory-heatmap .content {
position: relative;
margin-left: 30px;
width: 100%;
margin-top: 3px;
border-bottom: solid 1px #e6ebf5;
padding: 8px 0px;
}
.cl-memory-heatmap .content .legend-content {
position: relative;
margin-top: 3px;
width: calc(100% - 150px);
display: flex;
flex-wrap: wrap;
justify-content: flex-start;
}
.content .legend-content .legend-item {
width: 26px;
height: 56px;
padding: 5px 0;
margin-left: 12px;
}
.content .legend-content .legend-item .color-item {
width: 100%;
margin-left: 3px;
padding: 0 1px;
height: calc(100% - 20px);
}
.content .legend-content .legend-item .color-item div {
position: relative;
margin-top: 5px;
width: 100%;
height: 100%;
border: solid 1px #e6ebf5;
}
.content .legend-content .legend-item .value-item {
margin-top: -21px;
}
.cl-memory-heatmap .heatmap-content {
height: calc(100% - 132px);
overflow-y: auto;
display: flex;
flex-wrap: wrap;
margin-top: 20px;
}
.cl-memory-heatmap .heatmap-content .heatmap-item {
width: 25%;
height: 260px;
padding: 0 10px;
margin-top: 20px;
flex-shrink: 0;
}
.cl-memory-heatmap .heatmap-content .heatmap-item.mt0 {
margin-top: 0;
}
.cl-memory-heatmap .heatmap-content .heatmap-item .detail-content {
height: calc(100% - 28px);
background: #e6ebf5;
display: flex;
flex-wrap: wrap;
overflow-y: auto;
padding-bottom: 10px;
border-radius: 6px;
}
.heatmap-content .heatmap-item .detail-content .device-item {
flex-shrink: 0;
width: 25%;
height: 50%;
padding: 5px;
}
.heatmap-item .detail-content .device-item .color-item {
width: 100%;
height: calc(100% - 23px);
padding: 0 10px;
display: block;
position: relative;
}
.heatmap-item .detail-content .device-item .color-item div {
max-height: 100%;
height: 50px;
width: 24px;
bottom: 0;
left: 50%;
margin-left: -12px;
position: absolute;
cursor: pointer;
}
.heatmap-item .detail-content .device-item .info-item {
margin-top: 4px;
text-align: center;
}
.cl-memory-heatmap .heatmap-content .heatmap-item .info-content {
margin-top: 10px;
font-size: 16px;
color: #333;
font-weight: 600;
text-align: center;
}
</style>

+ 264
- 0
mindinsight/ui/src/views/profiling/performance-dashboard.vue View File

@@ -0,0 +1,264 @@
<!--
Copyright 2021 Huawei Technologies Co., Ltd.All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template>
<div class="performance-dashboard">
<div class="container">
<div class="header" @click="jump('performance', performanceState === normalState)">
<span class="title">{{ performanceChart.title }}</span>
<span :class="{
'jump': true,
'is-effective': performanceState === normalState
}">
{{ $t('profiling.viewDetail') }}
<i class="el-icon-d-arrow-right"></i>
</span>
</div>
<div class="content" ref="performance">
<empty :state="performanceState"></empty>
</div>
</div>
</div>
</template>

<script>
import echarts from 'echarts';
import RequestService from '../../services/request-service';
import empty, {NO_DATA, LOADING_DATA} from '../../components/empty';
export default {
props: {
activeName: String,
},
components: {
empty,
},
data() {
return {
trainInfo: {
id: this.$route.query.id,
path: this.$route.query.path,
dir: this.$route.query.dir,
}, // Info of current training job
resizeTimer: null, // Timer ID of Debounce of resize event callback
performanceState: LOADING_DATA, // State of performance window
performanceChart: {
dom: null,
instance: null,
data: null,
dimensions: [
this.$t('profilingCluster.rankID'),
this.$t('profiling.iterationGapTime'),
this.$t('profiling.fpBpTime'),
this.$t('profiling.tailTime'),
],
title: this.$t('profilingCluster.performanceChartTitle'),
}, // Chart object of performance window
normalState: 'normal', // Normal page state
};
},
mounted() {
this.performanceChart.dom = this.$refs.performance ? this.$refs.performance : null;
this.queryPerformanceInfo().then((state) => {
if (state) this.initChart(this.performanceChart);
});
window.addEventListener('resize', this.resizeCallBack);
},
methods: {
/**
* The logic of callback of resize event
*/
resizeCallBack() {
if (this.resizeTimer) {
clearTimeout(this.resizeTimer);
}
this.resizeTimer = setTimeout(() => {
if (this.performanceChart.dom && this.performanceChart.instance) {
this.performanceChart.instance.resize();
}
}, 100); // 100: Delay of debounce of callback
},
/**
* The logic of click details
* @param {string} path
* @param {boolean} effective
*/
jump(path, effective) {
if (!effective) return;
this.$router.push({
path: `profiling-${path}`,
query: Object.assign({
activeName: this.activeName,
}, this.trainInfo),
});
},
/**
* The logic of query performance info
* @return {Promise}
*/
queryPerformanceInfo() {
return new Promise((resolve) => {
const params = {};
params.params = {
train_id: this.trainInfo.id,
};
RequestService.getClusterInfo(params)
.then((res) => {
// eslint-disable-next-line camelcase
if (res?.data?.step_trace.length > 0) {
const chartData = [];
res.data.step_trace.forEach((item)=>{
const chartItem = [item.rank_id].concat(item.step_trace_info);
chartData.push(chartItem);
});
this.performanceChart.data = chartData;
this.performanceState = this.normalState;
resolve(true);
} else {
this.performanceState = NO_DATA;
}
})
.catch((e) => {
this.performanceState = NO_DATA;
resolve(false);
});
});
},
/**
* The logic of init echart
* @param {Object} chart
*/
initChart(chart) {
if (!chart.dom) return;
if (!chart.instance) {
chart.instance = echarts.init(chart.dom);
}
chart.instance.setOption({
tooltip: {
trigger: 'axis',
axisPointer: {
type: 'shadow',
},
},
legend: {
right: 70,
top: 15,
data: '',
},
grid: {
top: 60,
left: 80,
right: 80,
},
dataset: {
dimensions: chart.dimensions,
source: chart.data,
},
xAxis: {
name: this.$t('profilingCluster.rankID'),
nameTextStyle: {
align: 'left',
padding: [0, 5],
color: '#9EA4B3',
},
type: 'category',
axisLine: {
lineStyle: {
color: '#E6EBF5',
width: 2,
},
},
axisLabel: {
color: '#9EA4B3',
},
},
yAxis: {
name: this.$t('profilingCluster.timeTitle'),
nameGap: 20,
nameTextStyle: {
align: 'right',
padding: [0, 5],
color: '#9EA4B3',
},
axisLine: {
lineStyle: {
color: '#E6EBF5',
width: 2,
},
},
axisLabel: {
color: '#9EA4B3',
},
splitLine: {
lineStyle: {
color: ['#E6EBF5'],
width: 1,
type: 'dashed',
},
},
},
series: new Array(chart.dimensions.length - 1).fill(
{type: 'bar', barWidth: 8},
),
});
},
},
beforeDestroy() {
if (this.resizeTimer) {
clearTimeout(this.resizeTimer);
}
window.removeEventListener('resize', this.resizeCallBack);
},
};
</script>

<style scoped>
.performance-dashboard {
display: grid;
grid-template-rows: 100%;
grid-template-columns: 100%;
height: 100%;
row-gap: 20px;
}
.performance-dashboard .container {
width: 100%;
height: 100%;
border: 1px solid #d9d9d9;
border-radius: 4px;
padding: 15px;
position: relative;
}
.performance-dashboard .content {
width: 100%;
height: calc(100% - 24px);
}
.performance-dashboard .header {
width: 100%;
height: 24px;
display: flex;
align-items: center;
justify-content: space-between;
}
.performance-dashboard .header .title {
font-size: 18px;
font-weight: 700;
}
.performance-dashboard .header .jump {
cursor: pointer;
color: #b8b8b8;
}
.performance-dashboard .header .is-effective {
color: #00a5a7;
}
</style>

mindinsight/ui/src/views/profiling/profiling-cluster.vue → mindinsight/ui/src/views/profiling/profiling-performance.vue View File

@@ -22,7 +22,7 @@ limitations under the License.
<div class="path-message">
<span>{{$t('symbols.leftbracket')}}</span>
<span>{{$t('trainingDashboard.summaryDirPath')}}</span>
<span>{{summaryPath}}</span>
<span>{{trainInfo.path}}</span>
<span>{{$t('symbols.rightbracket')}}</span>
</div>
</div>
@@ -94,6 +94,10 @@ limitations under the License.
</div>
</div>

<img src="@/assets/images/close-page.png"
class="cl-cluster-close"
@click="backToDashboard">

<div class="no-data-img"
v-show="!chartData.length">
<div>
@@ -111,8 +115,12 @@ import RequestService from '../../services/request-service';
export default {
data() {
return {
summaryPath: '',
trainingJobId: this.$route.query.id, // ID of the current training job
trainInfo: {
id: this.$route.query.id,
path: this.$route.query.path,
dir: this.$route.query.dir,
},
activeName: this.$route.query.activeName,
chartObj: null, // chart obj
chartData: [], // chart data
chartOption: { // chart option
@@ -211,14 +219,14 @@ export default {
};
},
mounted() {
if (!this.trainingJobId) {
if (!this.trainInfo.id) {
this.$message.error(this.$t('trainingDashboard.invalidId'));
document.title = `${this.$t('profilingCluster.clusterView')}-MindInsight`;
this.initOver = true;
return;
}
this.summaryPath = decodeURIComponent(this.trainingJobId);
document.title = `${this.summaryPath}-${this.$t(
// const summaryPath = decodeURIComponent(this.trainInfo.path);
document.title = `${this.trainInfo.path}-${this.$t(
'profilingCluster.clusterView',
)}-MindInsight`;

@@ -237,6 +245,14 @@ export default {
}
},
methods: {
backToDashboard() {
this.$router.push({
path: 'cluster-dashboard',
query: Object.assign({
activeName: this.activeName,
}, this.trainInfo),
});
},
/**
* initialize
* @param {Boolean} isInit whether get all data
@@ -246,7 +262,7 @@ export default {
queryStepTraceInfo(isInit, isSort) {
const params = {};
params.params = {
train_id: this.trainingJobId,
train_id: this.trainInfo.id,
};
params.body = {
sort_condition: this.sort_condition,
@@ -366,9 +382,9 @@ export default {
const routeUrl = this.$router.resolve({
path: path,
query: {
id: this.trainingJobId + '/cluster_profiler/' + row.host_ip,
id: this.trainInfo.id + '/cluster_profiler/' + row.host_ip,
dir: row.profiler_dir,
path: this.trainingJobId + '/cluster_profiler/' + row.host_ip,
path: this.trainInfo.path + '/cluster_profiler/' + row.host_ip,
deviceid: row.device_id.toString(),
},
});
@@ -437,6 +453,13 @@ export default {
flex-direction: column;
padding: 0 32px 24px 32px;
}
.cl-cluster .cl-cluster-close {
object-fit: none;
position: absolute;
cursor: pointer;
top: 36px;
right: 24px;
}
.cl-cluster .no-data-img {
background: #fff;
text-align: center;

+ 354
- 0
mindinsight/ui/src/views/profiling/resource-dashboard-cluster.vue View File

@@ -0,0 +1,354 @@
<!--
Copyright 2021 Huawei Technologies Co., Ltd.All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template>
<div class="cl-memory-heatmap-dasnhoard">
<div class="dashboard-item">
<div class="title-item">
<div class="title-text">
{{$t('profilingCluster.memoryHeatMapTitle')}}
</div>
<div class="detail-link"
:class="{disabled:!memoryHeatmapInitOver || !memoryHeatmapDataList.length}">
<button :disabled="!memoryHeatmapInitOver || !memoryHeatmapDataList.length"
@click="jumpToMemoryHeatmap">
{{$t('profiling.viewDetail')}}
<i class="el-icon-d-arrow-right"></i>
</button>
</div>
</div>
<div class="content-item">
<div class="noData-content"
v-show="!memoryHeatmapInitOver || !memoryHeatmapDataList.length">
<div>
<img :src="require('@/assets/images/nodata.png')" />
</div>
<div v-if="memoryHeatmapInitOver && !memoryHeatmapDataList.length"
class="noData-text">{{$t("public.noData")}}</div>
<div v-else
class="noData-text">{{$t("public.dataLoading")}}</div>
</div>
<div class="dashboard-chart-content"
v-show="memoryHeatmapDataList.length && memoryHeatmapInitOver">
<div class="legend-content">
<div class="legend-item"
v-for="(item, itemIndex) in legendArr"
:key="itemIndex">
<div class="color-item">
<div :style="{backgroundColor: item.backgroundColor}"></div>
</div>
<div class="value-item">{{item.info}}</div>
</div>
</div>
<div class="heatmap-content">
<div class="heatmap-item"
v-for="(item, itemIndex) in memoryHeatmapDataList"
:key="itemIndex"
:class="{'mt0': itemIndex < colNum}">
<div class="detail-content">
<div class="device-item"
v-for="(deviceItem, deviceItemIndex) in item.data"
:key="deviceItemIndex">
<div class="color-item">
<el-tooltip placement="top">
<div slot="content">
<div>
{{$t('profilingCluster.rankID') + $t('symbols.colon') + deviceItem.rankId}}
</div>
</div>
<div :style="{backgroundColor: deviceItem.backgroundColor}"></div>
</el-tooltip>
</div>
<div class="info-item">
{{$t('profilingCluster.deviceId') + deviceItem.deviceId}}
</div>
</div>
</div>
<div class="info-content">
{{item.hostIp}}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</template>
<script>
import RequestService from '../../services/request-service';
import Color from '../../common/common-property';
export default {
props: {
activeName: {
type: String,
default: '',
},
},
data() {
return {
summaryPath: this.$route.query.path, // Path of the current training job
trainingJobId: this.$route.query.id, // ID of the current training job
summaryDir: this.$route.query.dir, // Dir of the current training job
memoryHeatmapInitOver: false, // The page state
memoryHeatmapDataList: [], // The heatmap daata
legendArr: [], // Legend
legendArrLength: 10, // Length of legend
colNum: 4, // Column num of heatmap
granularity: 0.1, // Granularity
};
},
mounted() {
this.initLegendArr();
this.getHeatMapData();
},
methods: {
/**
* The logic of jump to heatmap page
*/
jumpToMemoryHeatmap() {
this.$router.push({
path: '/memory-heatmap',
query: {
dir: this.summaryDir,
id: this.trainingJobId,
path: this.summaryPath,
activeName: this.activeName,
},
});
},
/**
* The logic of init heatmap legend
*/
initLegendArr() {
const tempArr = [];
const colorArr = Color.clusterHeatmapDashboardColorArr;
for (let i = 0; i < colorArr.length; i++) {
tempArr.push({
backgroundColor: colorArr[i],
info: `≥0.${i}`,
});
}
this.legendArr = tempArr;
},
/**
* The logic of get heatmap data
*/
getHeatMapData() {
const params = {
train_id: this.trainingJobId,
};
RequestService.getClusterPeakMemory(params).then((res) => {
if (!res || !res.data) {
this.memoryHeatmapInitOver = true;
return;
}
const resData = res.data;
const heatmapDataDic = {};
const HeatmapDataArr = [];
resData.forEach((heatmap) => {
let dataIndex = heatmapDataDic[heatmap.host_ip];
if (isNaN(dataIndex)) {
dataIndex = HeatmapDataArr.length;
heatmapDataDic[heatmap.host_ip] = dataIndex;
HeatmapDataArr.push({
hostIp: heatmap.host_ip,
data: [],
});
}
// Factor used to avoid JS floating point number question
const factor = 100;
const index = Math.floor(((heatmap.peak_mem / heatmap.capacity) * factor) / (this.granularity * factor));
HeatmapDataArr[dataIndex].data.push({
deviceId: heatmap.device_id,
rankId: heatmap.rank_id,
peakMem: heatmap.peak_mem,
capacity: heatmap.capacity,
backgroundColor: this.legendArr[index].backgroundColor,
});
});
this.memoryHeatmapDataList = this.sortByDeviceId(HeatmapDataArr);
this.memoryHeatmapInitOver = true;
});
},
/**
* The logic of sort data by device ID
* @param {Array} oriData
* @return {Array}
*/
sortByDeviceId(oriData) {
if (!oriData) {
return [];
}
oriData.forEach((hostData) => {
hostData.data.sort((a, b) => {
return a.deviceId - b.deviceId;
});
});
return oriData;
},
},
};
</script>
<style scoped>
.cl-memory-heatmap-dasnhoard {
height: 100%;
width: 100%;
}
.cl-memory-heatmap-dasnhoard .dashboard-item {
width: 100%;
height: 100%;
padding: 15px;
border: solid 1px #d9d9d9;
border-radius: 4px;
min-height: 284px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item {
display: flex;
height: 24px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item .title-text {
flex: 1;
font-size: 18px;
font-weight: bold;
line-height: 24px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item .detail-link {
cursor: pointer;
font-size: 12px;
height: 18px;
line-height: 12px;
padding-top: 2px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item .detail-link a {
color: #00a5a7;
padding-right: 6px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item .detail-link button {
color: #00a5a7;
border: none;
background-color: #fff;
cursor: pointer;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .title-item .detail-link.disabled button {
color: #c0c4cc;
cursor: not-allowed;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .content-item {
height: calc(100% - 44px);
margin-top: 20px;
}
.cl-memory-heatmap-dasnhoard .dashboard-item .content-item .dashboard-chart-content {
width: 100%;
height: 100%;
}
.cl-memory-heatmap-dasnhoard .margin-item {
margin-top: 20px;
}
.cl-memory-heatmap-dasnhoard .noData-content {
width: 100%;
height: 100%;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
}
.cl-memory-heatmap-dasnhoard .noData-content p,
.cl-memory-heatmap-dasnhoard .noData-content .noData-text {
font-size: 16px;
}
.legend-content {
height: 56px;
border-bottom: solid 1px #e6ebf5;
display: flex;
justify-content: flex-end;
}
.legend-content .legend-item {
width: 26px;
height: 100%;
padding: 5px 0;
margin-left: 10px;
}
.legend-content .legend-item .color-item {
width: 100%;
padding: 0 6px;
height: calc(100% - 20px);
}
.legend-content .legend-item .color-item div {
width: 100%;
height: 100%;
border: solid 1px #e6ebf5;
}
.legend-content .legend-item .value-item {
margin-top: 4px;
}
.heatmap-content {
height: calc(100% - 132px);
overflow-y: auto;
display: flex;
flex-wrap: wrap;
margin-top: 20px;
}
.heatmap-content .heatmap-item {
width: 25%;
height: 260px;
padding: 0 10px;
margin-top: 20px;
flex-shrink: 0;
}
.heatmap-content .heatmap-item.mt0 {
margin-top: 0;
}
.heatmap-content .heatmap-item .detail-content {
height: calc(100% - 28px);
background: #e6ebf5;
display: flex;
flex-wrap: wrap;
overflow-y: auto;
padding-bottom: 10px;
}
.heatmap-content .heatmap-item .detail-content .device-item {
flex-shrink: 0;
width: 25%;
height: 50%;
padding: 5px;
}
.heatmap-item .detail-content .device-item .color-item {
width: 100%;
height: calc(100% - 23px);
padding: 0 10px;
display: block;
position: relative;
}
.heatmap-item .detail-content .device-item .color-item div {
max-height: 100%;
height: 50px;
width: 24px;
bottom: 0;
left: 50%;
margin-left: -12px;
position: absolute;
}
.heatmap-item .detail-content .device-item .info-item {
margin-top: 4px;
text-align: center;
}
.heatmap-content .heatmap-item .info-content {
margin-top: 10px;
font-size: 16px;
color: #333;
font-weight: 600;
text-align: center;
}
</style>

+ 4
- 4
mindinsight/ui/src/views/train-manage/summary-manage.vue View File

@@ -70,7 +70,7 @@ limitations under the License.
:label="$t('summaryManage.updateTime')"
show-overflow-tooltip>
</el-table-column>
<!--operate -->
<!-- operate -->
<el-table-column prop="operate"
:label="$t('summaryManage.operation')"
class-name="operate-container"
@@ -121,7 +121,7 @@ limitations under the License.
</div>

</div>
<!-- outer Page -->
<!-- outer Page -->
<div class="pagination-content">
<el-pagination @current-change="currentPageChange"
@size-change="currentPagesizeChange"
@@ -412,9 +412,9 @@ export default {
if (row.profiler_type === 'gpu') {
router = '/profiling-gpu';
} else if (row.profiler_type === 'cluster_ascend') {
router = '/profiling-cluster';
router = '/cluster-dashboard';
} else if (row.profiler_type === 'cluster_gpu') {
router = '/profiling-gpu-cluster';
router = '/cluster-dashboard';
}
this.$router.push({
path: router,


Loading…
Cancel
Save