diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index 0f7410fa..feba81ac 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -436,6 +436,22 @@ "gpuunit": "us/次", "chartTitle": "平均耗时排行" }, + "profilingGPU": { + "minddata_device_queue": { + "desc": "主队列为空比例{n1}/{n2},为满比例{n3}/{n4}。" + }, + "minddata_get_next_queue": { + "desc": "数据队列为空比例{n1}/{n2}。" + }, + "connectorQuene": "主队列", + "dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。", + "dataProcessInfo": "综合分析该阶段的流程,通过判断主队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。", + "analysisOne": "1、如果迭代间隙较长,并且数据队列部分batch为空,那么可能由于数据处理和数据传输阶段导致的性能异常,参考2,反之则定位数据传输算子get_next内部问题;", + "analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;", + "chipInfo": "数据队列为空比例:", + "hostIsEmpty": "主队列为空比例:", + "hostIsFull": "主队列为满比例:" + }, "components": { "summaryTitle": "训练选择", "tagSelectTitle": "标签选择", diff --git a/mindinsight/ui/src/router.js b/mindinsight/ui/src/router.js index be3e78d9..57a8655a 100644 --- a/mindinsight/ui/src/router.js +++ b/mindinsight/ui/src/router.js @@ -111,6 +111,10 @@ export default new Router({ component: () => import('./views/profiling-gpu/profiling-dashboard.vue'), }, + { + path: 'data-process', + component: () => import('./views/profiling/data-process.vue'), + }, { path: 'operator', component: () => import('./views/profiling-gpu/operator.vue'), diff --git a/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue b/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue index 5a8ea3c1..7081dd58 100644 --- a/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue +++ b/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue @@ -36,13 +36,147 @@ limitations under the License.
{{ $t('profiling.mindData') }}
+
+ +
+
+ +
+
{{$t("profiling.features")}}
+
{{$t('profilingGPU.dataProcess')}}
+
{{$t('profilingGPU.dataProcessInfo')}}
+
{{$t('profilingGPU.analysisOne')}}
+
{{$t('profilingGPU.analysisTwo')}}
+
{{$t('profiling.higherAnalysis')}}
+
+
{{$t('profiling.statistics')}}
+
{{$t('profilingGPU.chipInfo')}} + {{processSummary.get_next.empty}} / {{processSummary.get_next.total}} +
+
+
{{$t('profilingGPU.hostIsEmpty')}} + {{processSummary.device.empty}} / {{processSummary.device.total}} +
+
{{$t('profilingGPU.hostIsFull')}} + {{processSummary.device.full}} / {{processSummary.device.total}} +
+
+
+ +
+
-
+
+
+
+ {{$t('profiling.pipeline')}} +
+
+ +
+
+
+ +
+
+ +
+
+ +
+
+
{{$t('profilingGPU.connectorQuene')}}
+
+
+
+ {{$t('profiling.queueTip2')}} + + {{processSummary.device.empty}} / {{processSummary.device.total}} + +
+
+ {{$t('profiling.queueTip1')}} + + {{processSummary.device.full}} / {{processSummary.device.total}} + +
+
+
+ +
+
+ {{$t('profiling.deviceQueueOp')}} +
+
+ +
+
+
+ +
+
+ +
+
+ +
+
+
{{$t('profiling.dataQueue')}}
+
+
+
+ {{$t('profiling.queueTip2')}} + + {{processSummary.get_next.empty}} / {{processSummary.get_next.total}} + +
+
+ {{$t('profiling.queueTip1')}} + + {{processSummary.get_next.full}} / {{processSummary.get_next.total}} + +
+
+
+ +
+
+ {{$t('profiling.getData')}} +
+
+
+
-
-

{{$t("public.stayTuned")}}

+

{{$t("public.dataLoading")}}

+

{{$t("public.noData")}}

@@ -103,7 +237,8 @@ limitations under the License. :disabled="timeLine.disable" :class="{disabled:timeLine.disable}">{{ $t('profiling.downloadTimeline') }} -
+
{{$t('profiling.streamNum')}}{{timelineInfo.streamNum}}
- {{$t('profiling.opNum')}}{{timelineInfo.opNum}}
+ {{$t('profiling.opNum')}}{{timelineInfo.opNum}} +
{{$t('profiling.opTimes')}}{{timelineInfo.opTimes}}{{$t('profiling.times')}}
@@ -180,6 +316,25 @@ export default { summaryPath: this.$route.query.dir, // Summary path data relativePath: this.$route.query.path, // Relative path of summary log currentCard: '', // Data of current card + queueInfoShow: false, // Whether to show queue information + deviceInfoShow: false, // Whether to show device information + processSummary: { + // Data of process summary + noData: true, + count: 6, + maxCount: 6, + device: { + empty: 0, // Number of empty devices + full: 0, // Number of full devices + total: 0, // Total number of devices + }, + get_next: { + empty: 0, + full: 0, + total: 0, + }, + initOver: false, // Is initialization complete + }, pieChart: { // Pie graph information of operators chartDom: null, @@ -247,6 +402,65 @@ export default { init() { this.queryTimeline(); this.initPieChart(); + this.getProccessSummary(); + }, + /** + * Get the data of proccess summary + */ + getProccessSummary() { + const params = { + train_id: this.trainingJobId, + profile: this.summaryPath, + device_id: this.currentCard, + }; + RequestService.queryProcessSummary(params).then((resp) => { + this.processSummary.initOver = true; + if (resp && resp.data) { + const data = JSON.parse(JSON.stringify(resp.data)); + this.processSummary.count = Object.keys(data).length; + this.dealProcess(data); + } else { + this.dealProcess(null); + this.processSummary.initOver = true; + } + }); + }, + /** + * Set the data of process + * @param {Object} data The data of process + */ + dealProcess(data) { + this.processSummary.device = { + empty: 0, + full: 0, + total: 0, + }; + this.processSummary.get_next = { + empty: 0, + full: 0, + total: 0, + }; + this.processSummary.noData = true; + + if (data && Object.keys(data).length) { + if (data.device_queue_info && data.device_queue_info.summary) { + this.deviceInfoShow = true; + this.processSummary.device = { + empty: data.device_queue_info.summary.empty_batch_count, + full: data.device_queue_info.summary.full_batch_count, + total: data.device_queue_info.summary.total_batch, + }; + } + if (data.get_next_queue_info && data.get_next_queue_info.summary) { + this.queueInfoShow = true; + this.processSummary.get_next = { + empty: data.get_next_queue_info.summary.empty_batch_count, + full: data.get_next_queue_info.summary.full_batch_count, + total: data.get_next_queue_info.summary.total_batch, + }; + } + this.processSummary.noData = false; + } }, /** * Router link @@ -576,6 +790,103 @@ export default { } .minddata { height: calc(55% - 15px); + .pipeline-container { + width: 100%; + padding: 20px 20px; + height: calc(100% - 52px); + display: flex; + font-size: 0; + align-items: baseline; + .cell-container { + width: 20%; + min-width: 110px; + padding: 20px 0; + border: 2px solid transparent; + .title { + font-size: 14px; + line-height: 20px; + padding: 0 0 0 10px; + font-weight: bold; + } + } + .data-process { + background-color: #e3f8eb; + .title { + border-left: 2px solid #00a5a7; + } + } + .device_queue_op { + background-color: #e1f2ff; + .title { + border-left: 2px solid #6cbfff; + } + } + .get-next { + background-color: #fef4dd; + .title { + border-left: 2px solid #fdca5a; + } + } + .queue-container { + width: 20%; + position: relative; + .img { + width: 100%; + height: 24px; + margin-top: 30px; + .edge { + width: calc(50% - 40px); + display: inline-block; + vertical-align: middle; + img { + width: 100%; + } + } + .icon { + padding: 0 20px; + display: inline-block; + vertical-align: middle; + img { + padding: 3px; + border: 2px solid transparent; + } + } + } + + .title { + text-align: center; + font-size: 14px; + margin-top: 10px; + font-weight: bold; + } + .description { + position: absolute; + font-size: 12px; + line-height: 12px; + white-space: nowrap; + overflow: visible; + width: 100%; + text-align: center; + .line { + width: 1px; + height: 40px; + margin: 20px 0; + border-left: 1px solid #979797; + display: inline-block; + } + .item { + font-size: 12px; + line-height: 16px; + white-space: normal; + overflow: visible; + .num { + white-space: nowrap; + color: #07a695; + } + } + } + } + } } } .pro-router-right { diff --git a/mindinsight/ui/src/views/profiling-gpu/profiling.vue b/mindinsight/ui/src/views/profiling-gpu/profiling.vue index 89fa1d5d..0d166edd 100644 --- a/mindinsight/ui/src/views/profiling-gpu/profiling.vue +++ b/mindinsight/ui/src/views/profiling-gpu/profiling.vue @@ -211,7 +211,7 @@ export default { : '--'; const divDom = document.createElement('div'); divDom.setAttribute('class', 'content-style'); - const content = `${this.$t(`profiling`)[item].desc}` + const content = `${this.$t(`profilingGPU`)[item].desc}` .replace( `{n1}`, ` ${deviceEmpty}`, @@ -242,7 +242,7 @@ export default { : '--'; const divDom = document.createElement('div'); divDom.setAttribute('class', 'content-style'); - const content = `${this.$t(`profiling`)[item].desc}` + const content = `${this.$t(`profilingGPU`)[item].desc}` .replace( `{n1}`, ` ${getNextEmpty}`, diff --git a/mindinsight/ui/src/views/profiling/data-process.vue b/mindinsight/ui/src/views/profiling/data-process.vue index 33378efa..19d3326b 100644 --- a/mindinsight/ui/src/views/profiling/data-process.vue +++ b/mindinsight/ui/src/views/profiling/data-process.vue @@ -48,7 +48,7 @@ limitations under the License. alt="" /> -
{{$t('profiling.connectorQuene')}}
+
{{connectorQuene}}
@@ -132,7 +132,7 @@ limitations under the License.
-
{{$t('profiling.connectorQuene')}}
+
{{connectorQuene}}