diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index 115b5b485..533c12987 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -41,35 +41,36 @@ type AiModelManage struct { } type AiModelConvert struct { - ID string `xorm:"pk"` - Name string `xorm:"INDEX NOT NULL"` - Status string `xorm:"NULL"` - SrcEngine int `xorm:"NOT NULL DEFAULT 0"` - RepoId int64 `xorm:"INDEX NULL"` - ModelId string `xorm:"NOT NULL"` - ModelName string `xorm:"NULL"` - ModelVersion string `xorm:"NOT NULL"` - ModelPath string `xorm:"NULL"` - DestFormat int `xorm:"NOT NULL DEFAULT 0"` - NetOutputFormat int `xorm:"NULL"` - UserId int64 `xorm:"NOT NULL"` - CloudBrainTaskId string `xorm:"NULL"` - ContainerID string - ContainerIp string - RunTime int64 `xorm:"NULL"` - TrainJobDuration string - InputShape string `xorm:"varchar(2000)"` - InputDataFormat string `xorm:"NOT NULL"` - Description string `xorm:"varchar(2000)"` - Path string `xorm:"varchar(400) NOT NULL"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - UpdatedUnix timeutil.TimeStamp `xorm:"updated"` - StartTime timeutil.TimeStamp - EndTime timeutil.TimeStamp - UserName string - UserRelAvatarLink string - IsCanOper bool - IsCanDelete bool + ID string `xorm:"pk"` + Name string `xorm:"INDEX NOT NULL"` + Status string `xorm:"NULL"` + SrcEngine int `xorm:"NOT NULL DEFAULT 0"` + RepoId int64 `xorm:"INDEX NULL"` + ModelId string `xorm:"NOT NULL"` + ModelName string `xorm:"NULL"` + ModelVersion string `xorm:"NOT NULL"` + ModelPath string `xorm:"NULL"` + DestFormat int `xorm:"NOT NULL DEFAULT 0"` + NetOutputFormat int `xorm:"NULL"` + UserId int64 `xorm:"NOT NULL"` + CloudBrainTaskId string `xorm:"NULL"` + ModelArtsVersionId string `xorm:"NULL"` + ContainerID string + ContainerIp string + RunTime int64 `xorm:"NULL"` + TrainJobDuration string + InputShape string `xorm:"varchar(2000)"` + InputDataFormat string `xorm:"NOT NULL"` + Description string `xorm:"varchar(2000)"` + Path string `xorm:"varchar(400) NOT NULL"` + CreatedUnix timeutil.TimeStamp `xorm:"created"` + UpdatedUnix timeutil.TimeStamp `xorm:"updated"` + StartTime timeutil.TimeStamp + EndTime timeutil.TimeStamp + UserName string + UserRelAvatarLink string + IsCanOper bool + IsCanDelete bool } type AiModelQueryOptions struct { @@ -109,6 +110,21 @@ func ModelComputeAndSetDuration(task *AiModelConvert, result JobResultPayload) { task.TrainJobDuration = ConvertDurationToStr(d) } +func UpdateModelConvertModelArts(id string, CloudBrainTaskId string, VersionId string) error { + var sess *xorm.Session + sess = x.ID(id) + defer sess.Close() + re, err := sess.Cols("cloud_brain_task_id,model_arts_version_id").Update(&AiModelConvert{ + CloudBrainTaskId: CloudBrainTaskId, + ModelArtsVersionId: VersionId, + }) + if err != nil { + return err + } + log.Info("success to update cloud_brain_task_id from db.re=" + fmt.Sprint((re))) + return nil +} + func UpdateModelConvertCBTI(id string, CloudBrainTaskId string) error { var sess *xorm.Session sess = x.ID(id) diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 601b79f18..3df239e1a 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -908,6 +908,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/modelmanage", func() { m.Get("/:id", repo.GetCloudbrainModelConvertTask) m.Get("/:id/log", repo.CloudbrainForModelConvertGetLog) + m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog) m.Get("/:id/model_list", repo.CloudBrainModelConvertList) }, reqRepoReader(models.UnitTypeModelManage)) m.Group("/modelarts", func() { diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 9e4edea03..82fe4e112 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -199,6 +199,68 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { } +func TrainJobForModelConvertGetLog(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":id") + var baseLine = ctx.Query("base_line") + var order = ctx.Query("order") + var lines = ctx.Query("lines") + lines_int, err := strconv.Atoi(lines) + if err != nil { + log.Error("change lines(%d) string to int failed", lines_int) + } + + if order != modelarts.OrderDesc && order != modelarts.OrderAsc { + log.Error("order(%s) check failed", order) + ctx.JSON(http.StatusBadRequest, map[string]interface{}{ + "err_msg": "order check failed", + }) + return + } + + resultLogFile, result, err := trainJobForModelConvertGetLogContent(jobID, baseLine, order, lines_int) + if err != nil { + log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + + ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "LogFileName": resultLogFile.LogFileList[0], + "StartLine": result.StartLine, + "EndLine": result.EndLine, + "Content": result.Content, + "Lines": result.Lines, + }) +} + +func trainJobForModelConvertGetLogContent(jobID string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { + task, err := models.QueryModelConvertById(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + return nil, nil, err + } + resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.CloudBrainTaskId, task.ModelArtsVersionId) + if err != nil { + log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.CloudBrainTaskId, err.Error()) + return nil, nil, err + } + + result, err := modelarts.GetTrainJobLog(task.CloudBrainTaskId, task.ModelArtsVersionId, baseLine, resultLogFile.LogFileList[0], order, lines) + if err != nil { + log.Error("GetTrainJobLog(%s) failed:%v", task.CloudBrainTaskId, err.Error()) + return nil, nil, err + } + + return resultLogFile, result, err +} + func TrainJobGetLog(ctx *context.APIContext) { var ( err error diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index 904bea36d..60eff8c79 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -195,8 +195,8 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context BranchName: DefaultBranchName, } result, err := modelarts.GenerateModelConvertTrainJob(req) - log.Info("jobId=" + fmt.Sprint(result.JobID)) - models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID)) + log.Info("jobId=" + fmt.Sprint(result.JobID) + " versionid=" + fmt.Sprint(result.VersionID)) + models.UpdateModelConvertModelArts(modelConvert.ID, fmt.Sprint(result.JobID), fmt.Sprint(result.VersionID)) } func downloadConvertCode(repopath string, codePath, branchName string) error { @@ -368,6 +368,13 @@ func DeleteModelConvert(ctx *context.Context) { } } +func isCloudBrainTask(task *models.AiModelConvert) bool { + if task.SrcEngine == PYTORCH_ENGINE { + return true + } + return false +} + func StopModelConvert(ctx *context.Context) { id := ctx.Params(":id") log.Info("stop model convert start.id=" + id) @@ -384,35 +391,47 @@ func ShowModelConvertInfo(ctx *context.Context) { if err == nil { ctx.Data["task"] = job } - result, err := cloudbrain.GetJob(job.CloudBrainTaskId) - if err != nil { - log.Info("error:" + err.Error()) - ctx.Data["error"] = err.Error() - return - } - if result != nil { - jobRes, _ := models.ConvertToJobResultPayload(result.Payload) - ctx.Data["result"] = jobRes - taskRoles := jobRes.TaskRoles - taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) - ctx.Data["taskRes"] = taskRes - ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics - - job.Status = jobRes.JobStatus.State - - if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { - job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP - job.ContainerID = taskRes.TaskStatuses[0].ContainerID - job.Status = taskRes.TaskStatuses[0].State + if isCloudBrainTask(job) { + ctx.Data["npu_display"] = "none" + ctx.Data["gpu_display"] = "block" + result, err := cloudbrain.GetJob(job.CloudBrainTaskId) + if err != nil { + log.Info("error:" + err.Error()) + ctx.Data["error"] = err.Error() + return } - if jobRes.JobStatus.State != string(models.JobWaiting) { - models.ModelComputeAndSetDuration(job, jobRes) - err = models.UpdateModelConvert(job) - if err != nil { - log.Error("UpdateModelConvert failed:", err) + if result != nil { + jobRes, _ := models.ConvertToJobResultPayload(result.Payload) + ctx.Data["result"] = jobRes + taskRoles := jobRes.TaskRoles + taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + ctx.Data["taskRes"] = taskRes + ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics + ctx.Data["AppExitDiagnostics"] = jobRes.JobStatus.AppExitDiagnostics + + job.Status = jobRes.JobStatus.State + + if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { + job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP + job.ContainerID = taskRes.TaskStatuses[0].ContainerID + job.Status = taskRes.TaskStatuses[0].State + } + if jobRes.JobStatus.State != string(models.JobWaiting) { + models.ModelComputeAndSetDuration(job, jobRes) + err = models.UpdateModelConvert(job) + if err != nil { + log.Error("UpdateModelConvert failed:", err) + } } } + } else { + ctx.Data["npu_display"] = "block" + ctx.Data["gpu_display"] = "none" + ctx.Data["ExitDiagnostics"] = "" + ctx.Data["AppExitDiagnostics"] = "" + } + ctx.HTML(200, tplModelConvertInfo) } diff --git a/templates/repo/modelmanage/convertshowinfo.tmpl b/templates/repo/modelmanage/convertshowinfo.tmpl index ba790e354..a1f1f0c85 100644 --- a/templates/repo/modelmanage/convertshowinfo.tmpl +++ b/templates/repo/modelmanage/convertshowinfo.tmpl @@ -215,8 +215,9 @@ td, th {
@@ -417,7 +418,7 @@ td, th {
- + @@ -443,6 +444,25 @@ td, th {
+
+
+ + + + + + + +
+ + +

+                            
+
+
+
@@ -455,6 +475,8 @@ td, th {
+ + @@ -712,4 +734,136 @@ td, th { document.getElementById("info_display").innerHTML=html; } + function debounce(fn,delay){ + let timer; + return (...args) => { + // 判断定时器是否存在,清除定时器 + if (timer) { + clearTimeout(timer); + } + + // 重新调用setTimeout + timer = setTimeout(() => { + fn.apply(this, args); + }, delay); + }; + } + const fn = debounce(logScroll, 500) + function logScroll(version_name) { + let container = document.querySelector(`#log_npu`) + let scrollTop = container.scrollTop + let scrollHeight = container.scrollHeight + let clientHeight = container.clientHeight + let scrollLeft = container.scrollLeft + if(((parseInt(scrollTop) + clientHeight == scrollHeight || parseInt(scrollTop) + clientHeight +1 == scrollHeight || parseInt(scrollTop) + clientHeight - 1 == scrollHeight)) && parseInt(scrollTop)!==0 && scrollLeft==0){ + let end_line = $(`#log_npu input[name=end_line]`).val() + $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${end_line}&lines=50&order=desc`, (data) => { + if (data.Lines == 0){ + $(`#log_npu_header`).text('您已翻阅至日志底部') + $(`#log_npu_message`).css('display', 'block') + setTimeout(function(){ + $(`#log_npu_message`).css('display', 'none') + }, 1000) + }else{ + if(end_line===data.EndLine){ + return + } + else{ + $(`#log_npu input[name=end_line]`).val(data.EndLine) + $(`#log_npu`).append('
' + data.Content)
+                    }
+
+                }
+            }).fail(function(err) {
+                console.log(err);
+            });
+        }
+        if([0,1,2,3,4,5,6,7,8,9,10].includes(scrollTop) && scrollLeft==0){
+            let start_line = $(`#log_npu input[name=start_line]`).val()
+            $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${start_line}&lines=50&order=asc`, (data) => {
+                if (data.Lines == 0){
+                    $(`#log_npu_header`).text('您已翻阅至日志顶部')
+                    $(`#log_npu_message`).css('display', 'block')
+                    setTimeout(function(){
+                        $(`#log_npu_message`).css('display', 'none')
+                    }, 1000)
+                }else{
+                    $(`#log_npu input[name=start_line]`).val(data.StartLine)   //如果变动就改变所对应的值
+                    $(`#log_npu`).prepend('
' + data.Content)
+                }
+            }).fail(function(err) {
+                console.log(err);
+            });
+        }
+    }
+    function scrollAnimation(dom, currentY, targetY, currentX) {
+        let needScrollTop = targetY - currentY;
+        let _currentY = currentY;
+        setTimeout(() => {
+            // 一次调用滑动帧数,每次调用会不一样
+            //取总距离的十分之一
+            const dist = Math.ceil(needScrollTop / 10);
+            _currentY += dist;
+            //移动一个十分之一
+            dom.scrollTo(currentX || 0, _currentY,'smooth');
+            // 如果移动幅度小于十个像素,直接移动,否则递归调用,实现动画效果
+            if (needScrollTop > 10 || needScrollTop < -10) {
+                scrollAnimation(dom, _currentY, targetY)
+            } else {
+                dom.scrollTo(0, targetY,'smooth')
+            }
+        }, 1)
+    }
+
+    $('.log_top').click(function(){
+
+        let logContentDom = document.querySelector(`#log_npu`)
+        
+        $(`#log_file_npu`).siblings('pre').remove()
+        $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=asc`, (data) => {
+            
+            $(`#log_npu input[name=end_line]`).val(data.EndLine)   //如果变动就改变所对应的值
+            $(`#log_npu input[name=start_line]`).val(data.StartLine)
+            $(`#log_npu`).prepend('
' + data.Content)
+            $(`#log_npu_header`).text('您已翻阅至日志顶部')
+            $(`#log_npu_message`).css('display', 'block')
+            setTimeout(function(){
+                $(`#log_npu_message`).css('display', 'none')
+            }, 1000)
+            scrollAnimation(logContentDom, logContentDom.scrollTop, 0);
+        })
+
+    })
+    $('.log_bottom').click(function(e){
+
+        let logContentDom = document.querySelector(`#log_npu`)
+        $(`#log_file_npu`).siblings('pre').remove()
+        $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=desc`, (data) => {
+            
+            $(`#log_npu input[name=end_line]`).val(data.EndLine)   //如果变动就改变所对应的值
+            $(`#log_npu input[name=start_line]`).val(data.StartLine)
+            $(`#log_npu`).append('
' + data.Content)
+            $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${data.EndLine}&lines=50&order=desc`, (data) => {
+                if (data.Lines == 0){
+                    $(`#log_npu_header`).text('您已翻阅至日志底部')
+                    $(`#log_npu_message`).css('display', 'block')
+                    setTimeout(function(){
+                        $(`#log_npu_message`).css('display', 'none')
+                    }, 1000)
+                }else{
+                    if(end_line===data.EndLine){
+                        return
+                    }
+                    else{
+                        $(`#log_npu input[name=end_line]`).val(data.EndLine)
+                        $(`#log_npu`).append('
' + data.Content)
+                    }
+
+                }
+            }).fail(function(err) {
+                console.log(err);
+            });
+            scrollAnimation(logContentDom, logContentDom.scrollTop+1, logContentDom.scrollHeight - logContentDom.clientHeight);
+        })
+    })
 
\ No newline at end of file