Browse Source

fix issue: UI show error tip and return homepage when train job does not exist

tags/v1.1.0
ph 5 years ago
parent
commit
c5eee763b2
9 changed files with 39 additions and 45 deletions
  1. +6
    -11
      mindinsight/ui/src/locales/en-us.json
  2. +6
    -11
      mindinsight/ui/src/locales/zh-cn.json
  3. +2
    -2
      mindinsight/ui/src/services/fetcher.js
  4. +11
    -4
      mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue
  5. +1
    -1
      mindinsight/ui/src/views/profiling-gpu/profiling.vue
  6. +1
    -8
      mindinsight/ui/src/views/profiling/data-process.vue
  7. +1
    -0
      mindinsight/ui/src/views/profiling/profiling-dashboard.vue
  8. +1
    -1
      mindinsight/ui/src/views/profiling/profiling.vue
  9. +10
    -7
      mindinsight/ui/src/views/profiling/step-trace.vue

+ 6
- 11
mindinsight/ui/src/locales/en-us.json View File

@@ -138,7 +138,7 @@
"trainingScalar": "Training Scalar Information",
"samplingData": "Data Sampling",
"imagesampleSwitch": "Switch Tag",
"invalidId": "Invalid training job.",
"invalidId": "Invalid job.",
"summaryDirPath": "Summary path:",
"loadingTip": "Loading...",
"waitLoading": "waiting to be loaded",
@@ -439,20 +439,14 @@
"chartTitle": "Average Time Consumption Ranking"
},
"profilingGPU": {
"minddata_device_queue": {
"desc": "The ratio of empty primary queues is {n1}/{n2}, and the ratio of full queues is {n3}/{n4}."
},
"minddata_get_next_queue": {
"desc": "The ratio of empty data queues is {n1}/{n2}."
},
"connectorQuene": "Primary Queues",
"dataProcess": "This shows the data processing. Data is stored in the primary queue during data processing, and then stored in the data queue during data transmission. Finally, the forward and backward propagation get_next transmits the data to forward propagation.",
"dataProcessInfo": "By determining the empty primary and data queues, you can preliminarily determine the stage where the performance is abnormal.",
"dataProcess": "This shows the data processing. Data is stored in the host queue during data processing, and then stored in the data queue during data transmission. Finally, the forward and backward propagation get_next transmits the data to forward propagation.",
"dataProcessInfo": "By determining the empty host and data queues, you can preliminarily determine the stage where the performance is abnormal.",
"analysisOne": "1. If the step interval is long and some batches of the data queue are empty, the performance is abnormal during data processing and transmission. Otherwise, locate the internal problem of the forward and backward propagation get_next.",
"analysisTwo": "2. If the performance is abnormal during data processing and transmission, check the primary queue. If the primary queue is empty at a high probability, the exception may occur during data transmission.",
"chipInfo": "Ratio of empty data queues:",
"hostIsEmpty": "Ratio of empty primary queues:",
"hostIsFull": "Ratio of full primary queues:"
"analysisTwo": "2. If the performance is abnormal during data processing and transmission, check the host queue. If the host queue is empty at a high probability, the exception may occur during data transmission.",
"chipInfo": "Ratio of empty data queues:"
},
"components": {
"summaryTitle": "Training Selection",
@@ -593,6 +587,7 @@
"50545012": "The tensor data does not exist. Please refresh.",
"50545013": "The requested data is too large. Try another dimension.",
"50545014": "The queried tensor data has been replaced by new data. Please refresh.",
"50546083": "The profiler directory does not exist.",
"50548001": "Ascend AI Processor information query timed out.",
"5054B080": "Incorrect parameter type. Please check the input parameter type.",
"5054B081": "Incorrect parameter value. Please check the input parameter.",


+ 6
- 11
mindinsight/ui/src/locales/zh-cn.json View File

@@ -138,7 +138,7 @@
"trainingScalar": "训练标量信息",
"samplingData": "数据抽样",
"imagesampleSwitch": "切换标签",
"invalidId": "无效的训练作业",
"invalidId": "无效的作业",
"summaryDirPath": "训练日志路径:",
"loadingTip": "加载中",
"waitLoading": "待加载",
@@ -438,20 +438,14 @@
"chartTitle": "平均耗时排名"
},
"profilingGPU": {
"minddata_device_queue": {
"desc": "主队列为空比例{n1}/{n2},为满比例{n3}/{n4}。"
},
"minddata_get_next_queue": {
"desc": "数据队列为空比例{n1}/{n2}。"
},
"connectorQuene": "主队列",
"dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。",
"dataProcessInfo": "综合分析该阶段的流程,通过判断主队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。",
"dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主机队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。",
"dataProcessInfo": "综合分析该阶段的流程,通过判断主机队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。",
"analysisOne": "1、如果迭代间隙较长,并且数据队列部分batch为空,那么可能由于数据处理和数据传输阶段导致的性能异常,参考2,反之则定位数据传输算子get_next内部问题;",
"analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;",
"chipInfo": "数据队列为空比例:",
"hostIsEmpty": "主队列为空比例:",
"hostIsFull": "主队列为满比例:"
"analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主机队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;",
"chipInfo": "数据队列为空比例:"
},
"components": {
"summaryTitle": "训练选择",
@@ -592,6 +586,7 @@
"50545012": "张量数据不存在,请刷新。",
"50545013": "请求的数据过大,请使用其他维度重试。",
"50545014": "查询的张量数据已被新数据替换,请刷新。",
"50546083": "性能数据目录不存在",
"50548001": "昇腾AI处理器信息查询超时",
"5054B080": "参数类型错误,请检查输入参数类型",
"5054B081": "参数值错误,请检查输入参数",


+ 2
- 2
mindinsight/ui/src/services/fetcher.js View File

@@ -74,7 +74,7 @@ axios.interceptors.response.use(
const errorCode = error.response.data.error_code.toString();

const ignoreCode = {
ignoreError: ['50545005'],
ignoreError: ['50545005', '50546083'],
regardError: ['50545013', '50545014', '5054500D'],
};

@@ -84,7 +84,7 @@ axios.interceptors.response.use(
}
setTimeout(()=>{
router.push('/');
}, 3000);
}, 2500);
return Promise.reject(error);
}
if (


+ 11
- 4
mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue View File

@@ -61,10 +61,10 @@ limitations under the License.
<span>{{processSummary.get_next.empty}} / {{processSummary.get_next.total}}</span>
</div>
<div v-show="deviceInfoShow">
<div>{{$t('profilingGPU.hostIsEmpty')}}
<div>{{$t('profiling.hostIsEmpty')}}
<span>{{processSummary.device.empty}} / {{processSummary.device.total}}</span>
</div>
<div>{{$t('profilingGPU.hostIsFull')}}
<div>{{$t('profiling.hostIsFull')}}
<span>{{processSummary.device.full}} / {{processSummary.device.total}}</span>
</div>
</div>
@@ -97,7 +97,7 @@ limitations under the License.
alt="" />
</div>
</div>
<div class="title">{{$t('profilingGPU.connectorQuene')}}</div>
<div class="title">{{$t('profiling.connectorQuene')}}</div>
<div class="description">
<div class="line"></div>
<div class="item"
@@ -369,10 +369,15 @@ export default {
if (newValue.curCardNum === '') {
this.pieChart.noData = true;
this.pieChart.initOver = true;
this.processSummary.initOver = true;
this.timelineInfo.initOver = true;
this.timeLine.waiting = false;
}
if (newValue.query.dir && newValue.query.id && newValue.query.path) {
if (newValue.query.dir &&
newValue.query.id &&
newValue.query.path &&
newValue.curCardNum
) {
this.summaryPath = newValue.query.dir;
this.trainingJobId = newValue.query.id;
this.relativePath = newValue.query.path;
@@ -387,6 +392,7 @@ export default {
)}-MindInsight`;
}
this.pieChart.initOver = false;
this.processSummary.initOver = false;
this.timelineInfo.initOver = false;
this.init();
}
@@ -583,6 +589,7 @@ export default {
}
})
.catch(() => {
this.pieChart.data = [];
this.pieChart.noData = true;
this.pieChart.initOver = true;
});


+ 1
- 1
mindinsight/ui/src/views/profiling-gpu/profiling.vue View File

@@ -80,7 +80,7 @@ export default {
collapse: false,
curDashboardInfo: {
// Current Select card info
curCardNum: '',
curCardNum: null,
query: {},
},
};


+ 1
- 8
mindinsight/ui/src/views/profiling/data-process.vue View File

@@ -460,11 +460,7 @@ export default {
});
},
init() {
this.connectorQuene = this.$t(
`profiling${
location.href.includes('#/profiling-gpu/') ? 'GPU' : ''
}.connectorQuene`,
);
this.connectorQuene = this.$t('profiling.connectorQuene');
this.queryProcessSummary();
},
/**
@@ -588,9 +584,6 @@ export default {
setOption(chart) {
const myChart = echarts.init(document.getElementById(chart.id));
const option = {
title: {
text: '',
},
tooltip: {
trigger: 'axis',
},


+ 1
- 0
mindinsight/ui/src/views/profiling/profiling-dashboard.vue View File

@@ -670,6 +670,7 @@ export default {
}
})
.catch(() => {
this.pieChart.data = [];
this.pieChart.noData = true;
this.pieChart.initOver = true;
});


+ 1
- 1
mindinsight/ui/src/views/profiling/profiling.vue View File

@@ -80,7 +80,7 @@ export default {
collapse: false,
curDashboardInfo: {
// Current Select card info
curCardNum: '',
curCardNum: null,
query: {},
initOver: false,
},


+ 10
- 7
mindinsight/ui/src/views/profiling/step-trace.vue View File

@@ -290,10 +290,7 @@ export default {
label: this.$t('profiling.stepInputTip'),
};

this.getTimeInfo('fp-bp', 'fp_and_bp');
this.getTimeInfo('iter-gap', 'iteration_interval');
this.getTimeInfo('tailing', 'tail');
this.queryTrainingTrace(0);
this.queryTrainingTrace(0, true);
},
/**
* Change the current step value
@@ -303,13 +300,13 @@ export default {
if (value === 0 || (!this.steps.step && this.steps.step !== 0)) {
this.steps.step = null;
this.steps.trueStep = null;
this.queryTrainingTrace(0);
this.queryTrainingTrace(0, false);
} else if (
/^[0-9]*[1-9][0-9]*$/.test(this.steps.step) &&
this.steps.step <= this.steps.max
) {
this.steps.trueStep = this.steps.step;
this.queryTrainingTrace(this.steps.step);
this.queryTrainingTrace(this.steps.step, false);
} else {
this.steps.step = this.steps.trueStep;
this.$message.error(
@@ -471,8 +468,9 @@ export default {
/**
* Get training trace information
* @param {Number} step Current step value
* @param {Boolean} init Init flag
*/
queryTrainingTrace(step) {
queryTrainingTrace(step, init) {
const params = {
dir: this.relativePath,
type: step,
@@ -506,6 +504,11 @@ export default {
JSON.parse(JSON.stringify(res.data.training_trace_graph)),
);
});
if (init) {
this.getTimeInfo('fp-bp', 'fp_and_bp');
this.getTimeInfo('iter-gap', 'iteration_interval');
this.getTimeInfo('tailing', 'tail');
}
} else {
this.fp_start = '--';
this.bp_end = '--';


Loading…
Cancel
Save