Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/3195 Reviewed-by: liuzx <liuzx@pcl.ac.cn>tags/v1.22.11.2^2
@@ -245,6 +245,32 @@ func GetTrainJobLog(jobID string) (string, error) { | |||
return logContent, nil | |||
} | |||
func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobMetricStatisticResult | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics") | |||
if err != nil { | |||
return result, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||
} | |||
if err = json.Unmarshal([]byte(res.String()), &result); err != nil { | |||
log.Error("GetGrampusMetrics json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
log.Error("Call GrampusMetrics failed(%d):%s(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) | |||
return result, fmt.Errorf("Call GrampusMetrics failed(%d):%d(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetGrampusMetrics(%s) failed", jobID) | |||
return result, fmt.Errorf("GetGrampusMetrics failed:%s", result.ErrorMsg) | |||
} | |||
return result, nil | |||
} | |||
func StopJob(jobID string) (*models.GrampusStopJobResponse, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
@@ -1048,6 +1048,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Get("", repo.GetModelArtsTrainJobVersion) | |||
m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob) | |||
m.Get("/log", repo_ext.GrampusGetLog) | |||
m.Get("/metrics", repo_ext.GrampusMetrics) | |||
m.Get("/download_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) | |||
}) | |||
}) | |||
@@ -957,6 +957,28 @@ func GrampusGetLog(ctx *context.Context) { | |||
return | |||
} | |||
func GrampusMetrics(ctx *context.Context) { | |||
jobID := ctx.Params(":jobid") | |||
job, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err, ctx.Data["MsgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
} | |||
result, err := grampus.GetGrampusMetrics(job.JobID) | |||
if err != nil { | |||
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobID": jobID, | |||
"Interval": result.Interval, | |||
"MetricsInfo": result.MetricsInfo, | |||
}) | |||
return | |||
} | |||
func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName, modelRemoteObsUrl string) (string, error) { | |||
var command string | |||
@@ -284,10 +284,7 @@ | |||
<div class="content-pad"> | |||
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | |||
<a class="active item" | |||
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item" data-tab="second{{$k}}" | |||
onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a> | |||
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item log_bottom" data-tab="third{{$k}}" | |||
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
@@ -504,25 +501,6 @@ | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="second{{$k}}"> | |||
<div> | |||
<div class="ui message message{{.VersionName}}" style="display: none;"> | |||
<div id="header"></div> | |||
</div> | |||
<div class="ui attached log" id="log_state{{.VersionName}}" | |||
style="height: 390px !important; overflow: auto;"> | |||
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}"> | |||
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}"> | |||
<span id="info_display" class="info_text"> | |||
</span> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="third{{$k}}"> | |||
<div class="file-info"> | |||
<a id="{{.VersionName}}-log-down" | |||
@@ -922,66 +900,7 @@ | |||
$('.secondary.menu .item').tab(); | |||
}); | |||
let userName | |||
let repoPath | |||
let jobID | |||
let downlaodFlag = {{ $.canDownload }} | |||
let taskID = {{ $.task.ID }} | |||
let realJobName = {{ $.task.JobName }} | |||
$(document).ready(function () { | |||
let url = window.location.href; | |||
let urlArr = url.split('/') | |||
userName = urlArr.slice(-5)[0] | |||
repoPath = urlArr.slice(-4)[0] | |||
jobID = urlArr.slice(-1)[0] | |||
}) | |||
function stopBubbling(e) { | |||
e = window.event || e; | |||
if (e.stopPropagation) { | |||
e.stopPropagation(); //阻止事件 冒泡传播 | |||
} else { | |||
e.cancelBubble = true; //ie兼容 | |||
} | |||
} | |||
function loadLog(version_name) { | |||
document.getElementById("mask").style.display = "block" | |||
let startLine = $('input[name=end_line]').val(); | |||
if(startLine==""){ | |||
startLine=0; | |||
} | |||
let endLine = $('input[name=end_line]').val(); | |||
if(endLine==""){ | |||
endLine = 50; | |||
} | |||
$.get(`/${userName}/${repoPath}/cloudbrain/train-job/${jobID}/get_log?endLine=${endLine}&startLine=${startLine}`, (data) => { | |||
$('input[name=end_line]').val(data.EndLine) | |||
$('input[name=start_line]').val(data.StartLine) | |||
$(`#log_file${version_name}`).text(data.Content) | |||
document.getElementById("mask").style.display = "none" | |||
}).fail(function (err) { | |||
console.log(err); | |||
document.getElementById("mask").style.display = "none" | |||
}); | |||
} | |||
function refreshStatus(version_name) { | |||
$.get(`/api/v1/repos/${userName}/${repoPath}/cloudbrain/${taskID}?version_name=${versionname}`, (data) => { | |||
// header status and duration | |||
//$(`#${version_name}-duration-span`).text(data.JobDuration) | |||
$(`#${version_name}-status-span span`).text(data.JobStatus) | |||
$(`#${version_name}-status-span i`).attr("class", data.JobStatus) | |||
// detail status and duration | |||
//$('#'+version_name+'-duration').text(data.JobDuration) | |||
$('#' + version_name + '-status').text(data.JobStatus) | |||
loadLog(version_name) | |||
}).fail(function (err) { | |||
console.log(err); | |||
}); | |||
stopBubbling(arguments.callee.caller.arguments[0]) | |||
} | |||
function parseInfo() { | |||
let jsonValue = document.getElementById("json_value").value; | |||
@@ -238,11 +238,8 @@ | |||
<span> | |||
<div style="float: right;"> | |||
{{$.CsrfTokenHtml}} | |||
</div> | |||
<div class="ac-display-inblock title_text acc-margin-bottom"> | |||
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
<span class="cti-mgRight-sm"> | |||
{{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span> | |||
@@ -260,7 +257,6 @@ | |||
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}"> | |||
<i class="redo icon redo-color"></i> | |||
</span> | |||
</div> | |||
<div style="float: right;"> | |||
{{if and ($.canDownload) (ne .Status "WAITING") ($.Permission.CanWrite $.UnitTypeModelManage) }} | |||
@@ -269,7 +265,6 @@ | |||
{{else}} | |||
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a> | |||
{{end}} | |||
</div> | |||
</span> | |||
</span> | |||
@@ -282,6 +277,9 @@ | |||
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item log_bottom" data-tab="second{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
{{ if eq $.Spec.ComputeResource "NPU"}} | |||
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}" data-path="{{$.RepoRelPath}}/grampus/train-job/{{.JobID}}/metrics">{{$.i18n.Tr "cloudbrain.resource_use"}}</a> | |||
{{end}} | |||
<a class="item load-model-file" data-tab="third{{$k}}" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||
</div> | |||
<div class="ui tab active" data-tab="first{{$k}}"> | |||
@@ -564,6 +562,14 @@ | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="four{{$k}}" style="position: relative;"> | |||
<i class="ri-refresh-line metric_chart" | |||
style="position: absolute;right: 25%;color:#3291f8;z-index:99;cursor: pointer;" | |||
data-version="{{.VersionName}}"></i> | |||
<div id="metric-{{.VersionName}}" style="height: 260px;width: 870px;"> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="third{{$k}}"> | |||
<input type="hidden" name="model{{.VersionName}}" value="-1"> | |||
@@ -321,7 +321,7 @@ | |||
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item log_bottom" data-tab="second{{$k}}" | |||
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "cloudbrain.resource_use"}}</a> | |||
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}" data-path="{{$.RepoRelPath}}/modelarts/train-job/{{.JobID}}/metric_statistics?version_name={{.VersionName}}&statistic_type=each&metrics=">{{$.i18n.Tr "cloudbrain.resource_use"}}</a> | |||
<a class="item load-model-file" data-tab="third{{$k}}" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||
</div> | |||
<div class="ui tab active" data-tab="first{{$k}}"> | |||
@@ -5071,12 +5071,7 @@ function initcreateRepo() { | |||
initcreateRepo(); | |||
function initChartsNpu() { | |||
const url = window.location.href; | |||
const urlArr = url.split("/"); | |||
let userName = urlArr.slice(-5)[0]; | |||
let repoPath = urlArr.slice(-4)[0]; | |||
let jobID = urlArr.slice(-1)[0]; | |||
const repoPath = $('.metric_chart').data('path') | |||
let options = { | |||
legend: { | |||
data: [], | |||
@@ -5127,7 +5122,7 @@ function initChartsNpu() { | |||
document.getElementById(`metric-${versionName}`) | |||
); | |||
$.get( | |||
`${window.config.AppSubUrl}/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/metric_statistics?version_name=${versionName}&statistic_type=each&metrics=`, | |||
`${window.config.AppSubUrl}/api/v1/repos/${repoPath}`, | |||
(res) => { | |||
let filterDta = res.MetricsInfo.filter((item) => { | |||
return ![ | |||