Browse Source

提交代码。

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.7.1
zouap 3 years ago
parent
commit
3bf80e0cd8
5 changed files with 311 additions and 59 deletions
  1. +45
    -29
      models/ai_model_manage.go
  2. +1
    -0
      routers/api/v1/api.go
  3. +62
    -0
      routers/api/v1/repo/modelarts.go
  4. +46
    -27
      routers/repo/ai_model_convert.go
  5. +157
    -3
      templates/repo/modelmanage/convertshowinfo.tmpl

+ 45
- 29
models/ai_model_manage.go View File

@@ -41,35 +41,36 @@ type AiModelManage struct {
}

type AiModelConvert struct {
ID string `xorm:"pk"`
Name string `xorm:"INDEX NOT NULL"`
Status string `xorm:"NULL"`
SrcEngine int `xorm:"NOT NULL DEFAULT 0"`
RepoId int64 `xorm:"INDEX NULL"`
ModelId string `xorm:"NOT NULL"`
ModelName string `xorm:"NULL"`
ModelVersion string `xorm:"NOT NULL"`
ModelPath string `xorm:"NULL"`
DestFormat int `xorm:"NOT NULL DEFAULT 0"`
NetOutputFormat int `xorm:"NULL"`
UserId int64 `xorm:"NOT NULL"`
CloudBrainTaskId string `xorm:"NULL"`
ContainerID string
ContainerIp string
RunTime int64 `xorm:"NULL"`
TrainJobDuration string
InputShape string `xorm:"varchar(2000)"`
InputDataFormat string `xorm:"NOT NULL"`
Description string `xorm:"varchar(2000)"`
Path string `xorm:"varchar(400) NOT NULL"`
CreatedUnix timeutil.TimeStamp `xorm:"created"`
UpdatedUnix timeutil.TimeStamp `xorm:"updated"`
StartTime timeutil.TimeStamp
EndTime timeutil.TimeStamp
UserName string
UserRelAvatarLink string
IsCanOper bool
IsCanDelete bool
ID string `xorm:"pk"`
Name string `xorm:"INDEX NOT NULL"`
Status string `xorm:"NULL"`
SrcEngine int `xorm:"NOT NULL DEFAULT 0"`
RepoId int64 `xorm:"INDEX NULL"`
ModelId string `xorm:"NOT NULL"`
ModelName string `xorm:"NULL"`
ModelVersion string `xorm:"NOT NULL"`
ModelPath string `xorm:"NULL"`
DestFormat int `xorm:"NOT NULL DEFAULT 0"`
NetOutputFormat int `xorm:"NULL"`
UserId int64 `xorm:"NOT NULL"`
CloudBrainTaskId string `xorm:"NULL"`
ModelArtsVersionId string `xorm:"NULL"`
ContainerID string
ContainerIp string
RunTime int64 `xorm:"NULL"`
TrainJobDuration string
InputShape string `xorm:"varchar(2000)"`
InputDataFormat string `xorm:"NOT NULL"`
Description string `xorm:"varchar(2000)"`
Path string `xorm:"varchar(400) NOT NULL"`
CreatedUnix timeutil.TimeStamp `xorm:"created"`
UpdatedUnix timeutil.TimeStamp `xorm:"updated"`
StartTime timeutil.TimeStamp
EndTime timeutil.TimeStamp
UserName string
UserRelAvatarLink string
IsCanOper bool
IsCanDelete bool
}

type AiModelQueryOptions struct {
@@ -109,6 +110,21 @@ func ModelComputeAndSetDuration(task *AiModelConvert, result JobResultPayload) {
task.TrainJobDuration = ConvertDurationToStr(d)
}

func UpdateModelConvertModelArts(id string, CloudBrainTaskId string, VersionId string) error {
var sess *xorm.Session
sess = x.ID(id)
defer sess.Close()
re, err := sess.Cols("cloud_brain_task_id,model_arts_version_id").Update(&AiModelConvert{
CloudBrainTaskId: CloudBrainTaskId,
ModelArtsVersionId: VersionId,
})
if err != nil {
return err
}
log.Info("success to update cloud_brain_task_id from db.re=" + fmt.Sprint((re)))
return nil
}

func UpdateModelConvertCBTI(id string, CloudBrainTaskId string) error {
var sess *xorm.Session
sess = x.ID(id)


+ 1
- 0
routers/api/v1/api.go View File

@@ -908,6 +908,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/modelmanage", func() {
m.Get("/:id", repo.GetCloudbrainModelConvertTask)
m.Get("/:id/log", repo.CloudbrainForModelConvertGetLog)
m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog)
m.Get("/:id/model_list", repo.CloudBrainModelConvertList)
}, reqRepoReader(models.UnitTypeModelManage))
m.Group("/modelarts", func() {


+ 62
- 0
routers/api/v1/repo/modelarts.go View File

@@ -199,6 +199,68 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {

}

func TrainJobForModelConvertGetLog(ctx *context.APIContext) {
var (
err error
)

var jobID = ctx.Params(":id")
var baseLine = ctx.Query("base_line")
var order = ctx.Query("order")
var lines = ctx.Query("lines")
lines_int, err := strconv.Atoi(lines)
if err != nil {
log.Error("change lines(%d) string to int failed", lines_int)
}

if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
log.Error("order(%s) check failed", order)
ctx.JSON(http.StatusBadRequest, map[string]interface{}{
"err_msg": "order check failed",
})
return
}

resultLogFile, result, err := trainJobForModelConvertGetLogContent(jobID, baseLine, order, lines_int)
if err != nil {
log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
// ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
return
}

ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"LogFileName": resultLogFile.LogFileList[0],
"StartLine": result.StartLine,
"EndLine": result.EndLine,
"Content": result.Content,
"Lines": result.Lines,
})
}

func trainJobForModelConvertGetLogContent(jobID string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
task, err := models.QueryModelConvertById(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
return nil, nil, err
}
resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.CloudBrainTaskId, task.ModelArtsVersionId)
if err != nil {
log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.CloudBrainTaskId, err.Error())
return nil, nil, err
}

result, err := modelarts.GetTrainJobLog(task.CloudBrainTaskId, task.ModelArtsVersionId, baseLine, resultLogFile.LogFileList[0], order, lines)
if err != nil {
log.Error("GetTrainJobLog(%s) failed:%v", task.CloudBrainTaskId, err.Error())
return nil, nil, err
}

return resultLogFile, result, err
}

func TrainJobGetLog(ctx *context.APIContext) {
var (
err error


+ 46
- 27
routers/repo/ai_model_convert.go View File

@@ -195,8 +195,8 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context
BranchName: DefaultBranchName,
}
result, err := modelarts.GenerateModelConvertTrainJob(req)
log.Info("jobId=" + fmt.Sprint(result.JobID))
models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID))
log.Info("jobId=" + fmt.Sprint(result.JobID) + " versionid=" + fmt.Sprint(result.VersionID))
models.UpdateModelConvertModelArts(modelConvert.ID, fmt.Sprint(result.JobID), fmt.Sprint(result.VersionID))
}

func downloadConvertCode(repopath string, codePath, branchName string) error {
@@ -368,6 +368,13 @@ func DeleteModelConvert(ctx *context.Context) {
}
}

func isCloudBrainTask(task *models.AiModelConvert) bool {
if task.SrcEngine == PYTORCH_ENGINE {
return true
}
return false
}

func StopModelConvert(ctx *context.Context) {
id := ctx.Params(":id")
log.Info("stop model convert start.id=" + id)
@@ -384,35 +391,47 @@ func ShowModelConvertInfo(ctx *context.Context) {
if err == nil {
ctx.Data["task"] = job
}
result, err := cloudbrain.GetJob(job.CloudBrainTaskId)
if err != nil {
log.Info("error:" + err.Error())
ctx.Data["error"] = err.Error()
return
}
if result != nil {
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
ctx.Data["result"] = jobRes
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
ctx.Data["taskRes"] = taskRes
ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics

job.Status = jobRes.JobStatus.State

if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) {
job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
job.ContainerID = taskRes.TaskStatuses[0].ContainerID
job.Status = taskRes.TaskStatuses[0].State
if isCloudBrainTask(job) {
ctx.Data["npu_display"] = "none"
ctx.Data["gpu_display"] = "block"
result, err := cloudbrain.GetJob(job.CloudBrainTaskId)
if err != nil {
log.Info("error:" + err.Error())
ctx.Data["error"] = err.Error()
return
}
if jobRes.JobStatus.State != string(models.JobWaiting) {
models.ModelComputeAndSetDuration(job, jobRes)
err = models.UpdateModelConvert(job)
if err != nil {
log.Error("UpdateModelConvert failed:", err)
if result != nil {
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
ctx.Data["result"] = jobRes
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
ctx.Data["taskRes"] = taskRes
ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics
ctx.Data["AppExitDiagnostics"] = jobRes.JobStatus.AppExitDiagnostics

job.Status = jobRes.JobStatus.State

if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) {
job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
job.ContainerID = taskRes.TaskStatuses[0].ContainerID
job.Status = taskRes.TaskStatuses[0].State
}
if jobRes.JobStatus.State != string(models.JobWaiting) {
models.ModelComputeAndSetDuration(job, jobRes)
err = models.UpdateModelConvert(job)
if err != nil {
log.Error("UpdateModelConvert failed:", err)
}
}
}
} else {
ctx.Data["npu_display"] = "block"
ctx.Data["gpu_display"] = "none"
ctx.Data["ExitDiagnostics"] = ""
ctx.Data["AppExitDiagnostics"] = ""

}

ctx.HTML(200, tplModelConvertInfo)
}



+ 157
- 3
templates/repo/modelmanage/convertshowinfo.tmpl View File

@@ -215,8 +215,9 @@ td, th {
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item" data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second" onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
<a class="item" data-tab="third" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a id="gpuruntimeinfo" style="display: {{$.gpu_display}};" class="item" data-tab="second" onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
<a id="gpulog" style="display: {{$.gpu_display}};" class="item" data-tab="third" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a id="npulog" style="display: {{$.npu_display}};" class="item" data-tab="five" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item" data-tab="four" onclick="loadModelFile()">{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first">
@@ -417,7 +418,7 @@ td, th {
<div id="header"></div>
</div>
<div class="ui attached log" id="log" style="height: 390px !important; overflow: auto;">
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}">
<input type="hidden" id="json_value" value="{{$.AppExitDiagnostics}}">
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}">
<span id="info_display" class="info_text">
@@ -443,6 +444,25 @@ td, th {
</div>

<div class="ui tab" data-tab="five">
<div style="position: relative;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;" class="log_top" data-version="V0001"><i class="icon-to-top"></i></a>
</span>
<span>
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;" class="log_bottom" data-version="V0001"><i class="icon-to-bottom"></i></a>
</span>
<div id="log_npu_message" class="ui message message" style="display: none;">
<div id="log_npu_header"></div>
</div>
<div class="ui attached log" onscroll="fn()" id="log_npu" style="height: 300px !important; overflow: auto;">
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file_npu"></pre>
</div>
</div>
</div>

<div class="ui tab" data-tab="four">
<input type="hidden" name="model" value="-1">
<input type="hidden" name="modelback" value="-1">
@@ -455,6 +475,8 @@ td, th {
</div>
</div>

</div>
</div>
@@ -712,4 +734,136 @@ td, th {
document.getElementById("info_display").innerHTML=html;
}
function debounce(fn,delay){
let timer;
return (...args) => {
// 判断定时器是否存在,清除定时器
if (timer) {
clearTimeout(timer);
}

// 重新调用setTimeout
timer = setTimeout(() => {
fn.apply(this, args);
}, delay);
};
}
const fn = debounce(logScroll, 500)
function logScroll(version_name) {
let container = document.querySelector(`#log_npu`)
let scrollTop = container.scrollTop
let scrollHeight = container.scrollHeight
let clientHeight = container.clientHeight
let scrollLeft = container.scrollLeft
if(((parseInt(scrollTop) + clientHeight == scrollHeight || parseInt(scrollTop) + clientHeight +1 == scrollHeight || parseInt(scrollTop) + clientHeight - 1 == scrollHeight)) && parseInt(scrollTop)!==0 && scrollLeft==0){
let end_line = $(`#log_npu input[name=end_line]`).val()
$.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${end_line}&lines=50&order=desc`, (data) => {
if (data.Lines == 0){
$(`#log_npu_header`).text('您已翻阅至日志底部')
$(`#log_npu_message`).css('display', 'block')
setTimeout(function(){
$(`#log_npu_message`).css('display', 'none')
}, 1000)
}else{
if(end_line===data.EndLine){
return
}
else{
$(`#log_npu input[name=end_line]`).val(data.EndLine)
$(`#log_npu`).append('<pre>' + data.Content)
}

}
}).fail(function(err) {
console.log(err);
});
}
if([0,1,2,3,4,5,6,7,8,9,10].includes(scrollTop) && scrollLeft==0){
let start_line = $(`#log_npu input[name=start_line]`).val()
$.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${start_line}&lines=50&order=asc`, (data) => {
if (data.Lines == 0){
$(`#log_npu_header`).text('您已翻阅至日志顶部')
$(`#log_npu_message`).css('display', 'block')
setTimeout(function(){
$(`#log_npu_message`).css('display', 'none')
}, 1000)
}else{
$(`#log_npu input[name=start_line]`).val(data.StartLine) //如果变动就改变所对应的值
$(`#log_npu`).prepend('<pre>' + data.Content)
}
}).fail(function(err) {
console.log(err);
});
}
}
function scrollAnimation(dom, currentY, targetY, currentX) {
let needScrollTop = targetY - currentY;
let _currentY = currentY;
setTimeout(() => {
// 一次调用滑动帧数,每次调用会不一样
//取总距离的十分之一
const dist = Math.ceil(needScrollTop / 10);
_currentY += dist;
//移动一个十分之一
dom.scrollTo(currentX || 0, _currentY,'smooth');
// 如果移动幅度小于十个像素,直接移动,否则递归调用,实现动画效果
if (needScrollTop > 10 || needScrollTop < -10) {
scrollAnimation(dom, _currentY, targetY)
} else {
dom.scrollTo(0, targetY,'smooth')
}
}, 1)
}

$('.log_top').click(function(){

let logContentDom = document.querySelector(`#log_npu`)
$(`#log_file_npu`).siblings('pre').remove()
$.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=asc`, (data) => {
$(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值
$(`#log_npu input[name=start_line]`).val(data.StartLine)
$(`#log_npu`).prepend('<pre>' + data.Content)
$(`#log_npu_header`).text('您已翻阅至日志顶部')
$(`#log_npu_message`).css('display', 'block')
setTimeout(function(){
$(`#log_npu_message`).css('display', 'none')
}, 1000)
scrollAnimation(logContentDom, logContentDom.scrollTop, 0);
})

})
$('.log_bottom').click(function(e){

let logContentDom = document.querySelector(`#log_npu`)
$(`#log_file_npu`).siblings('pre').remove()
$.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=desc`, (data) => {
$(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值
$(`#log_npu input[name=start_line]`).val(data.StartLine)
$(`#log_npu`).append('<pre>' + data.Content)
$.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${data.EndLine}&lines=50&order=desc`, (data) => {
if (data.Lines == 0){
$(`#log_npu_header`).text('您已翻阅至日志底部')
$(`#log_npu_message`).css('display', 'block')
setTimeout(function(){
$(`#log_npu_message`).css('display', 'none')
}, 1000)
}else{
if(end_line===data.EndLine){
return
}
else{
$(`#log_npu input[name=end_line]`).val(data.EndLine)
$(`#log_npu`).append('<pre>' + data.Content)
}

}
}).fail(function(err) {
console.log(err);
});
scrollAnimation(logContentDom, logContentDom.scrollTop+1, logContentDom.scrollHeight - logContentDom.clientHeight);
})
})
</script>

Loading…
Cancel
Save