Browse Source

UI add profiling gpu data process page

tags/v1.1.0
WeiFeng-mindinsight 5 years ago
parent
commit
89712d6168
7 changed files with 362 additions and 43 deletions
  1. +16
    -0
      mindinsight/ui/src/locales/zh-cn.json
  2. +4
    -0
      mindinsight/ui/src/router.js
  3. +316
    -5
      mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue
  4. +2
    -2
      mindinsight/ui/src/views/profiling-gpu/profiling.vue
  5. +13
    -5
      mindinsight/ui/src/views/profiling/data-process.vue
  6. +10
    -29
      mindinsight/ui/src/views/profiling/profiling-dashboard.vue
  7. +1
    -2
      mindinsight/ui/src/views/train-manage/graph.vue

+ 16
- 0
mindinsight/ui/src/locales/zh-cn.json View File

@@ -436,6 +436,22 @@
"gpuunit": "us/次",
"chartTitle": "平均耗时排行"
},
"profilingGPU": {
"minddata_device_queue": {
"desc": "主队列为空比例{n1}/{n2},为满比例{n3}/{n4}。"
},
"minddata_get_next_queue": {
"desc": "数据队列为空比例{n1}/{n2}。"
},
"connectorQuene": "主队列",
"dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。",
"dataProcessInfo": "综合分析该阶段的流程,通过判断主队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。",
"analysisOne": "1、如果迭代间隙较长,并且数据队列部分batch为空,那么可能由于数据处理和数据传输阶段导致的性能异常,参考2,反之则定位数据传输算子get_next内部问题;",
"analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;",
"chipInfo": "数据队列为空比例:",
"hostIsEmpty": "主队列为空比例:",
"hostIsFull": "主队列为满比例:"
},
"components": {
"summaryTitle": "训练选择",
"tagSelectTitle": "标签选择",


+ 4
- 0
mindinsight/ui/src/router.js View File

@@ -111,6 +111,10 @@ export default new Router({
component: () =>
import('./views/profiling-gpu/profiling-dashboard.vue'),
},
{
path: 'data-process',
component: () => import('./views/profiling/data-process.vue'),
},
{
path: 'operator',
component: () => import('./views/profiling-gpu/operator.vue'),


+ 316
- 5
mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue View File

@@ -36,13 +36,147 @@ limitations under the License.
<div class="minddata">
<div class="title-wrap">
<div class="title">{{ $t('profiling.mindData') }}</div>
<div class="view-detail">
<button @click="viewDetail('data-process')"
:disabled="processSummary.noData"
:class="{disabled:processSummary.noData}">
{{ $t('profiling.viewDetail') }}
<i class="el-icon-d-arrow-right"></i></button>
</div>
<div class="tip-icon">
<el-tooltip placement="bottom"
effect="light">
<div slot="content"
class="tooltip-container">
<div class="font-size-style">{{$t("profiling.features")}}</div>
<div>{{$t('profilingGPU.dataProcess')}}</div>
<div>{{$t('profilingGPU.dataProcessInfo')}}</div>
<div>{{$t('profilingGPU.analysisOne')}}</div>
<div>{{$t('profilingGPU.analysisTwo')}}</div>
<div v-show="deviceInfoShow || queueInfoShow">{{$t('profiling.higherAnalysis')}}</div>
<br />
<div v-show="deviceInfoShow || queueInfoShow"
class="font-size-style">{{$t('profiling.statistics')}}</div>
<div v-show="queueInfoShow">{{$t('profilingGPU.chipInfo')}}
<span>{{processSummary.get_next.empty}} / {{processSummary.get_next.total}}</span>
</div>
<div v-show="deviceInfoShow">
<div>{{$t('profilingGPU.hostIsEmpty')}}
<span>{{processSummary.device.empty}} / {{processSummary.device.total}}</span>
</div>
<div>{{$t('profilingGPU.hostIsFull')}}
<span>{{processSummary.device.full}} / {{processSummary.device.total}}</span>
</div>
</div>
</div>
<i class="el-icon-info"></i>
</el-tooltip>
</div>
</div>
<div class="image-noData">
<div class="pipeline-container"
v-show="!processSummary.noData">
<div class="cell-container data-process">
<div class="title">
{{$t('profiling.pipeline')}}
</div>
</div>

<div class="queue-container">
<div class="img">
<div class="edge">
<img src="@/assets/images/data-flow.png"
alt="" />
</div>
<div class="icon">
<img src="@/assets/images/queue.svg"
alt=""
clickKey="connector_queue" />
</div>
<div class="edge">
<img src="@/assets/images/data-flow.png"
alt="" />
</div>
</div>
<div class="title">{{$t('profilingGPU.connectorQuene')}}</div>
<div class="description">
<div class="line"></div>
<div class="item"
v-if="processSummary.device.empty || processSummary.device.empty === 0">
{{$t('profiling.queueTip2')}}
<span class="num">
{{processSummary.device.empty}} / {{processSummary.device.total}}
</span>
</div>
<div class="item"
v-if="processSummary.device.full || processSummary.device.full === 0">
{{$t('profiling.queueTip1')}}
<span class="num">
{{processSummary.device.full}} / {{processSummary.device.total}}
</span>
</div>
</div>
</div>

<div class="cell-container device_queue_op"
clickKey="device_queue_op">
<div class="title">
{{$t('profiling.deviceQueueOp')}}
</div>
</div>

<div class="queue-container"
v-if="processSummary.count === processSummary.maxCount">
<div class="img">
<div class="edge">
<img src="@/assets/images/data-flow.png"
alt="" />
</div>
<div class="icon">
<img src="@/assets/images/queue.svg"
clickKey="data_queue"
alt="" />
</div>
<div class="edge">
<img src="@/assets/images/data-flow.png"
alt="" />
</div>
</div>
<div class="title">{{$t('profiling.dataQueue')}}</div>
<div class="description">
<div class="line"></div>
<div class="item"
v-if="processSummary.get_next.empty || processSummary.get_next.empty === 0">
{{$t('profiling.queueTip2')}}
<span class="num">
{{processSummary.get_next.empty}} / {{processSummary.get_next.total}}
</span>
</div>
<div class="item"
v-if="processSummary.get_next.full || processSummary.get_next.full === 0">
{{$t('profiling.queueTip1')}}
<span class="num">
{{processSummary.get_next.full}} / {{processSummary.get_next.total}}
</span>
</div>
</div>
</div>

<div class="cell-container get-next"
clickKey="get_next"
v-if="processSummary.count === processSummary.maxCount">
<div class="title">
{{$t('profiling.getData')}}
</div>
</div>
</div>
<div class="image-noData"
v-if="processSummary.noData">
<div>
<img :src="require('@/assets/images/coming-soon.png')"
<img :src="require('@/assets/images/nodata.png')"
alt="" />
</div>
<p>{{$t("public.stayTuned")}}</p>
<p v-show="!processSummary.initOver">{{$t("public.dataLoading")}}</p>
<p v-show="processSummary.initOver">{{$t("public.noData")}}</p>
</div>
</div>
</div>
@@ -103,7 +237,8 @@ limitations under the License.
:disabled="timeLine.disable"
:class="{disabled:timeLine.disable}">{{ $t('profiling.downloadTimeline') }}
</button>
<div class="el-icon-loading loading-icon" v-show="timeLine.waiting"></div>
<div class="el-icon-loading loading-icon"
v-show="timeLine.waiting"></div>
</div>
<div class="tip-icon">
<el-tooltip placement="bottom"
@@ -150,7 +285,8 @@ limitations under the License.
<span>{{$t('profiling.streamNum')}}</span><span>{{timelineInfo.streamNum}}</span>
</div>
<div class="info-line">
<span>{{$t('profiling.opNum')}}</span><span>{{timelineInfo.opNum}}</span></div>
<span>{{$t('profiling.opNum')}}</span><span>{{timelineInfo.opNum}}</span>
</div>
<div class="info-line">
<span>{{$t('profiling.opTimes')}}</span><span>{{timelineInfo.opTimes}}{{$t('profiling.times')}}</span>
</div>
@@ -180,6 +316,25 @@ export default {
summaryPath: this.$route.query.dir, // Summary path data
relativePath: this.$route.query.path, // Relative path of summary log
currentCard: '', // Data of current card
queueInfoShow: false, // Whether to show queue information
deviceInfoShow: false, // Whether to show device information
processSummary: {
// Data of process summary
noData: true,
count: 6,
maxCount: 6,
device: {
empty: 0, // Number of empty devices
full: 0, // Number of full devices
total: 0, // Total number of devices
},
get_next: {
empty: 0,
full: 0,
total: 0,
},
initOver: false, // Is initialization complete
},
pieChart: {
// Pie graph information of operators
chartDom: null,
@@ -247,6 +402,65 @@ export default {
init() {
this.queryTimeline();
this.initPieChart();
this.getProccessSummary();
},
/**
* Get the data of proccess summary
*/
getProccessSummary() {
const params = {
train_id: this.trainingJobId,
profile: this.summaryPath,
device_id: this.currentCard,
};
RequestService.queryProcessSummary(params).then((resp) => {
this.processSummary.initOver = true;
if (resp && resp.data) {
const data = JSON.parse(JSON.stringify(resp.data));
this.processSummary.count = Object.keys(data).length;
this.dealProcess(data);
} else {
this.dealProcess(null);
this.processSummary.initOver = true;
}
});
},
/**
* Set the data of process
* @param {Object} data The data of process
*/
dealProcess(data) {
this.processSummary.device = {
empty: 0,
full: 0,
total: 0,
};
this.processSummary.get_next = {
empty: 0,
full: 0,
total: 0,
};
this.processSummary.noData = true;

if (data && Object.keys(data).length) {
if (data.device_queue_info && data.device_queue_info.summary) {
this.deviceInfoShow = true;
this.processSummary.device = {
empty: data.device_queue_info.summary.empty_batch_count,
full: data.device_queue_info.summary.full_batch_count,
total: data.device_queue_info.summary.total_batch,
};
}
if (data.get_next_queue_info && data.get_next_queue_info.summary) {
this.queueInfoShow = true;
this.processSummary.get_next = {
empty: data.get_next_queue_info.summary.empty_batch_count,
full: data.get_next_queue_info.summary.full_batch_count,
total: data.get_next_queue_info.summary.total_batch,
};
}
this.processSummary.noData = false;
}
},
/**
* Router link
@@ -576,6 +790,103 @@ export default {
}
.minddata {
height: calc(55% - 15px);
.pipeline-container {
width: 100%;
padding: 20px 20px;
height: calc(100% - 52px);
display: flex;
font-size: 0;
align-items: baseline;
.cell-container {
width: 20%;
min-width: 110px;
padding: 20px 0;
border: 2px solid transparent;
.title {
font-size: 14px;
line-height: 20px;
padding: 0 0 0 10px;
font-weight: bold;
}
}
.data-process {
background-color: #e3f8eb;
.title {
border-left: 2px solid #00a5a7;
}
}
.device_queue_op {
background-color: #e1f2ff;
.title {
border-left: 2px solid #6cbfff;
}
}
.get-next {
background-color: #fef4dd;
.title {
border-left: 2px solid #fdca5a;
}
}
.queue-container {
width: 20%;
position: relative;
.img {
width: 100%;
height: 24px;
margin-top: 30px;
.edge {
width: calc(50% - 40px);
display: inline-block;
vertical-align: middle;
img {
width: 100%;
}
}
.icon {
padding: 0 20px;
display: inline-block;
vertical-align: middle;
img {
padding: 3px;
border: 2px solid transparent;
}
}
}

.title {
text-align: center;
font-size: 14px;
margin-top: 10px;
font-weight: bold;
}
.description {
position: absolute;
font-size: 12px;
line-height: 12px;
white-space: nowrap;
overflow: visible;
width: 100%;
text-align: center;
.line {
width: 1px;
height: 40px;
margin: 20px 0;
border-left: 1px solid #979797;
display: inline-block;
}
.item {
font-size: 12px;
line-height: 16px;
white-space: normal;
overflow: visible;
.num {
white-space: nowrap;
color: #07a695;
}
}
}
}
}
}
}
.pro-router-right {


+ 2
- 2
mindinsight/ui/src/views/profiling-gpu/profiling.vue View File

@@ -211,7 +211,7 @@ export default {
: '--';
const divDom = document.createElement('div');
divDom.setAttribute('class', 'content-style');
const content = `${this.$t(`profiling`)[item].desc}`
const content = `${this.$t(`profilingGPU`)[item].desc}`
.replace(
`{n1}`,
`<span class="nowrap-style"> ${deviceEmpty}</span>`,
@@ -242,7 +242,7 @@ export default {
: '--';
const divDom = document.createElement('div');
divDom.setAttribute('class', 'content-style');
const content = `${this.$t(`profiling`)[item].desc}`
const content = `${this.$t(`profilingGPU`)[item].desc}`
.replace(
`{n1}`,
`<span class="nowrap-style"> ${getNextEmpty}</span>`,


+ 13
- 5
mindinsight/ui/src/views/profiling/data-process.vue View File

@@ -48,7 +48,7 @@ limitations under the License.
alt="" />
</div>
</div>
<div class="title">{{$t('profiling.connectorQuene')}}</div>
<div class="title">{{connectorQuene}}</div>
<div class="description">
<div class="item"
v-if="processSummary.device.empty || processSummary.device.empty === 0">
@@ -132,7 +132,7 @@ limitations under the License.
<div class="chart-content">
<div class="chart-wrap"
:class="{highlight:selected==='connector_queue'}">
<div class="title">{{$t('profiling.connectorQuene')}}</div>
<div class="title">{{connectorQuene}}</div>
<template v-if="!connectQueueChart.noData">
<div class="data-tips">
<div v-if="connectQueueChart.queueSummary.empty_queue!==undefined">
@@ -208,7 +208,7 @@ limitations under the License.
<div class="chart-content">
<div class="chart-wrap"
:class="{highlight:selected==='connector_queue'}">
<div class="title">{{$t('profiling.connectorQuene')}}</div>
<div class="title">{{connectorQuene}}</div>
<template v-if="!connectQueueChart.noData">
<div class="data-tips">
<div v-if="connectQueueChart.queueSummary.empty_queue!==undefined">
@@ -268,12 +268,14 @@ limitations under the License.
<div class="item"><span>{{current_op.name}} type:</span>{{current_op.op_type}}</div>
<div class="item">
<span>{{current_op.name}} {{$t('profiling.workersNum')}}:</span>
{{current_op.num_workers}}</div>
{{current_op.num_workers}}
</div>
<div class="item"><span>{{parent_op.name}} ID:</span>{{parent_op.op_id}}</div>
<div class="item"><span>{{parent_op.name}} type:</span>{{parent_op.op_type}}</div>
<div class="item">
<span>{{parent_op.name}} {{$t('profiling.workersNum')}}:</span>
{{parent_op.num_workers}}</div>
{{parent_op.num_workers}}
</div>
</div>
</div>
</div>
@@ -392,6 +394,7 @@ export default {
initQueue: '',
trainId: '',
selected: '',
connectorQuene: '',
};
},
watch: {
@@ -457,6 +460,11 @@ export default {
});
},
init() {
this.connectorQuene = this.$t(
`profiling${
location.href.includes('#/profiling-gpu/') ? 'GPU' : ''
}.connectorQuene`,
);
this.queryProcessSummary();
},
/**


+ 10
- 29
mindinsight/ui/src/views/profiling/profiling-dashboard.vue View File

@@ -130,14 +130,14 @@ limitations under the License.
<div v-show="deviceInfoShow || queueInfoShow"
class="font-size-style">{{$t('profiling.statistics')}}</div>
<div v-show="queueInfoShow">{{$t('profiling.chipInfo')}}
<span>{{queueInfoEmptyNum}} / {{queueInfoTotalNum}}</span>
<span>{{processSummary.get_next.empty}} / {{processSummary.get_next.total}}</span>
</div>
<div v-show="deviceInfoShow">
<div>{{$t('profiling.hostIsEmpty')}}
<span>{{deviceInfoEmptyNum}} / {{deviceInfoTotalNum}}</span>
<span>{{processSummary.device.empty}} / {{processSummary.device.total}}</span>
</div>
<div>{{$t('profiling.hostIsFull')}}
<span>{{deviceInfoFullNum}} / {{deviceInfoTotalNum}}</span>
<span>{{processSummary.device.full}} / {{processSummary.device.total}}</span>
</div>
</div>
</div>
@@ -309,7 +309,8 @@ limitations under the License.
:disabled="timeLine.disable"
:class="{disabled:timeLine.disable}">{{ $t('profiling.downloadTimeline') }}
</button>
<div class="el-icon-loading loading-icon" v-show="timeLine.waiting"></div>
<div class="el-icon-loading loading-icon"
v-show="timeLine.waiting"></div>
</div>
<div class="tip-icon">
<el-tooltip placement="bottom"
@@ -357,7 +358,8 @@ limitations under the License.
<span>{{$t('profiling.streamNum')}}</span><span>{{timelineInfo.streamNum}}</span>
</div>
<div class="info-line">
<span>{{$t('profiling.opNum')}}</span><span>{{timelineInfo.opNum}}</span></div>
<span>{{$t('profiling.opNum')}}</span><span>{{timelineInfo.opNum}}</span>
</div>
<div class="info-line">
<span>{{$t('profiling.opTimes')}}</span><span>{{timelineInfo.opTimes + $t('profiling.times')}}</span>
</div>
@@ -390,11 +392,6 @@ export default {
tailPercent: '--', // Ratio of time consumed by step tail
queueInfoShow: false, // Whether to show queue information
deviceInfoShow: false, // Whether to show device information
queueInfoEmptyNum: '--', // The number of empty queue information
queueInfoTotalNum: '--', // Total number of queues
deviceInfoEmptyNum: '--', // Number of empty device information
deviceInfoTotalNum: '--', // Total number of devices
deviceInfoFullNum: '--', // Number of full queues
svg: {
// Step trace svg information
data: [], // Data of svg
@@ -546,24 +543,6 @@ export default {
const data = JSON.parse(JSON.stringify(resp.data));
this.processSummary.count = Object.keys(data).length;
this.dealProcess(data);
// Chip side
if (resp.data.get_next_queue_info) {
this.queueInfoShow = true;
this.queueInfoEmptyNum =
resp.data.get_next_queue_info.summary.empty_batch_count;
this.queueInfoTotalNum =
resp.data.get_next_queue_info.summary.total_batch;
}
// Host side
if (resp.data.device_queue_info) {
this.deviceInfoShow = true;
this.deviceInfoEmptyNum =
resp.data.device_queue_info.summary.empty_batch_count;
this.deviceInfoTotalNum =
resp.data.device_queue_info.summary.total_batch;
this.deviceInfoFullNum =
resp.data.device_queue_info.summary.full_batch_count;
}
} else {
this.dealProcess(null);
this.processSummary.initOver = true;
@@ -1176,6 +1155,7 @@ export default {

if (data && Object.keys(data).length) {
if (data.device_queue_info && data.device_queue_info.summary) {
this.deviceInfoShow = true;
this.processSummary.device = {
empty: data.device_queue_info.summary.empty_batch_count,
full: data.device_queue_info.summary.full_batch_count,
@@ -1183,6 +1163,7 @@ export default {
};
}
if (data.get_next_queue_info && data.get_next_queue_info.summary) {
this.queueInfoShow = true;
this.processSummary.get_next = {
empty: data.get_next_queue_info.summary.empty_batch_count,
full: data.get_next_queue_info.summary.full_batch_count,
@@ -1360,7 +1341,7 @@ export default {
align-items: baseline;
.cell-container {
width: 20%;
min-width:110px;
min-width: 110px;
padding: 20px 0;
border: 2px solid transparent;
.title {


+ 1
- 2
mindinsight/ui/src/views/train-manage/graph.vue View File

@@ -1305,6 +1305,7 @@ export default {
(response) => {
if (response && response.data) {
this.treeFlag = false;
this.treeWrapFlag = true;
this.searchNode.childNodes = [];
const data = response.data.nodes.map((val) => {
return {
@@ -1824,8 +1825,6 @@ export default {
margin-right: 10px;
}
.el-tree {
overflow-x: auto;
overflow-y: hidden;
& > .el-tree-node {
min-width: 100%;
display: inline-block;


Loading…
Cancel
Save