diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index 115b5b485..533c12987 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -41,35 +41,36 @@ type AiModelManage struct { } type AiModelConvert struct { - ID string `xorm:"pk"` - Name string `xorm:"INDEX NOT NULL"` - Status string `xorm:"NULL"` - SrcEngine int `xorm:"NOT NULL DEFAULT 0"` - RepoId int64 `xorm:"INDEX NULL"` - ModelId string `xorm:"NOT NULL"` - ModelName string `xorm:"NULL"` - ModelVersion string `xorm:"NOT NULL"` - ModelPath string `xorm:"NULL"` - DestFormat int `xorm:"NOT NULL DEFAULT 0"` - NetOutputFormat int `xorm:"NULL"` - UserId int64 `xorm:"NOT NULL"` - CloudBrainTaskId string `xorm:"NULL"` - ContainerID string - ContainerIp string - RunTime int64 `xorm:"NULL"` - TrainJobDuration string - InputShape string `xorm:"varchar(2000)"` - InputDataFormat string `xorm:"NOT NULL"` - Description string `xorm:"varchar(2000)"` - Path string `xorm:"varchar(400) NOT NULL"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - UpdatedUnix timeutil.TimeStamp `xorm:"updated"` - StartTime timeutil.TimeStamp - EndTime timeutil.TimeStamp - UserName string - UserRelAvatarLink string - IsCanOper bool - IsCanDelete bool + ID string `xorm:"pk"` + Name string `xorm:"INDEX NOT NULL"` + Status string `xorm:"NULL"` + SrcEngine int `xorm:"NOT NULL DEFAULT 0"` + RepoId int64 `xorm:"INDEX NULL"` + ModelId string `xorm:"NOT NULL"` + ModelName string `xorm:"NULL"` + ModelVersion string `xorm:"NOT NULL"` + ModelPath string `xorm:"NULL"` + DestFormat int `xorm:"NOT NULL DEFAULT 0"` + NetOutputFormat int `xorm:"NULL"` + UserId int64 `xorm:"NOT NULL"` + CloudBrainTaskId string `xorm:"NULL"` + ModelArtsVersionId string `xorm:"NULL"` + ContainerID string + ContainerIp string + RunTime int64 `xorm:"NULL"` + TrainJobDuration string + InputShape string `xorm:"varchar(2000)"` + InputDataFormat string `xorm:"NOT NULL"` + Description string `xorm:"varchar(2000)"` + Path string `xorm:"varchar(400) NOT NULL"` + CreatedUnix timeutil.TimeStamp `xorm:"created"` + UpdatedUnix timeutil.TimeStamp `xorm:"updated"` + StartTime timeutil.TimeStamp + EndTime timeutil.TimeStamp + UserName string + UserRelAvatarLink string + IsCanOper bool + IsCanDelete bool } type AiModelQueryOptions struct { @@ -109,6 +110,21 @@ func ModelComputeAndSetDuration(task *AiModelConvert, result JobResultPayload) { task.TrainJobDuration = ConvertDurationToStr(d) } +func UpdateModelConvertModelArts(id string, CloudBrainTaskId string, VersionId string) error { + var sess *xorm.Session + sess = x.ID(id) + defer sess.Close() + re, err := sess.Cols("cloud_brain_task_id,model_arts_version_id").Update(&AiModelConvert{ + CloudBrainTaskId: CloudBrainTaskId, + ModelArtsVersionId: VersionId, + }) + if err != nil { + return err + } + log.Info("success to update cloud_brain_task_id from db.re=" + fmt.Sprint((re))) + return nil +} + func UpdateModelConvertCBTI(id string, CloudBrainTaskId string) error { var sess *xorm.Session sess = x.ID(id) diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 601b79f18..3df239e1a 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -908,6 +908,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/modelmanage", func() { m.Get("/:id", repo.GetCloudbrainModelConvertTask) m.Get("/:id/log", repo.CloudbrainForModelConvertGetLog) + m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog) m.Get("/:id/model_list", repo.CloudBrainModelConvertList) }, reqRepoReader(models.UnitTypeModelManage)) m.Group("/modelarts", func() { diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 9e4edea03..82fe4e112 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -199,6 +199,68 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { } +func TrainJobForModelConvertGetLog(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":id") + var baseLine = ctx.Query("base_line") + var order = ctx.Query("order") + var lines = ctx.Query("lines") + lines_int, err := strconv.Atoi(lines) + if err != nil { + log.Error("change lines(%d) string to int failed", lines_int) + } + + if order != modelarts.OrderDesc && order != modelarts.OrderAsc { + log.Error("order(%s) check failed", order) + ctx.JSON(http.StatusBadRequest, map[string]interface{}{ + "err_msg": "order check failed", + }) + return + } + + resultLogFile, result, err := trainJobForModelConvertGetLogContent(jobID, baseLine, order, lines_int) + if err != nil { + log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + + ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "LogFileName": resultLogFile.LogFileList[0], + "StartLine": result.StartLine, + "EndLine": result.EndLine, + "Content": result.Content, + "Lines": result.Lines, + }) +} + +func trainJobForModelConvertGetLogContent(jobID string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { + task, err := models.QueryModelConvertById(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + return nil, nil, err + } + resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.CloudBrainTaskId, task.ModelArtsVersionId) + if err != nil { + log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.CloudBrainTaskId, err.Error()) + return nil, nil, err + } + + result, err := modelarts.GetTrainJobLog(task.CloudBrainTaskId, task.ModelArtsVersionId, baseLine, resultLogFile.LogFileList[0], order, lines) + if err != nil { + log.Error("GetTrainJobLog(%s) failed:%v", task.CloudBrainTaskId, err.Error()) + return nil, nil, err + } + + return resultLogFile, result, err +} + func TrainJobGetLog(ctx *context.APIContext) { var ( err error diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index 904bea36d..60eff8c79 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -195,8 +195,8 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context BranchName: DefaultBranchName, } result, err := modelarts.GenerateModelConvertTrainJob(req) - log.Info("jobId=" + fmt.Sprint(result.JobID)) - models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID)) + log.Info("jobId=" + fmt.Sprint(result.JobID) + " versionid=" + fmt.Sprint(result.VersionID)) + models.UpdateModelConvertModelArts(modelConvert.ID, fmt.Sprint(result.JobID), fmt.Sprint(result.VersionID)) } func downloadConvertCode(repopath string, codePath, branchName string) error { @@ -368,6 +368,13 @@ func DeleteModelConvert(ctx *context.Context) { } } +func isCloudBrainTask(task *models.AiModelConvert) bool { + if task.SrcEngine == PYTORCH_ENGINE { + return true + } + return false +} + func StopModelConvert(ctx *context.Context) { id := ctx.Params(":id") log.Info("stop model convert start.id=" + id) @@ -384,35 +391,47 @@ func ShowModelConvertInfo(ctx *context.Context) { if err == nil { ctx.Data["task"] = job } - result, err := cloudbrain.GetJob(job.CloudBrainTaskId) - if err != nil { - log.Info("error:" + err.Error()) - ctx.Data["error"] = err.Error() - return - } - if result != nil { - jobRes, _ := models.ConvertToJobResultPayload(result.Payload) - ctx.Data["result"] = jobRes - taskRoles := jobRes.TaskRoles - taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) - ctx.Data["taskRes"] = taskRes - ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics - - job.Status = jobRes.JobStatus.State - - if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { - job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP - job.ContainerID = taskRes.TaskStatuses[0].ContainerID - job.Status = taskRes.TaskStatuses[0].State + if isCloudBrainTask(job) { + ctx.Data["npu_display"] = "none" + ctx.Data["gpu_display"] = "block" + result, err := cloudbrain.GetJob(job.CloudBrainTaskId) + if err != nil { + log.Info("error:" + err.Error()) + ctx.Data["error"] = err.Error() + return } - if jobRes.JobStatus.State != string(models.JobWaiting) { - models.ModelComputeAndSetDuration(job, jobRes) - err = models.UpdateModelConvert(job) - if err != nil { - log.Error("UpdateModelConvert failed:", err) + if result != nil { + jobRes, _ := models.ConvertToJobResultPayload(result.Payload) + ctx.Data["result"] = jobRes + taskRoles := jobRes.TaskRoles + taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + ctx.Data["taskRes"] = taskRes + ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics + ctx.Data["AppExitDiagnostics"] = jobRes.JobStatus.AppExitDiagnostics + + job.Status = jobRes.JobStatus.State + + if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { + job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP + job.ContainerID = taskRes.TaskStatuses[0].ContainerID + job.Status = taskRes.TaskStatuses[0].State + } + if jobRes.JobStatus.State != string(models.JobWaiting) { + models.ModelComputeAndSetDuration(job, jobRes) + err = models.UpdateModelConvert(job) + if err != nil { + log.Error("UpdateModelConvert failed:", err) + } } } + } else { + ctx.Data["npu_display"] = "block" + ctx.Data["gpu_display"] = "none" + ctx.Data["ExitDiagnostics"] = "" + ctx.Data["AppExitDiagnostics"] = "" + } + ctx.HTML(200, tplModelConvertInfo) } diff --git a/templates/repo/modelmanage/convertshowinfo.tmpl b/templates/repo/modelmanage/convertshowinfo.tmpl index ba790e354..a1f1f0c85 100644 --- a/templates/repo/modelmanage/convertshowinfo.tmpl +++ b/templates/repo/modelmanage/convertshowinfo.tmpl @@ -215,8 +215,9 @@ td, th {
' + data.Content)
+ }
+
+ }
+ }).fail(function(err) {
+ console.log(err);
+ });
+ }
+ if([0,1,2,3,4,5,6,7,8,9,10].includes(scrollTop) && scrollLeft==0){
+ let start_line = $(`#log_npu input[name=start_line]`).val()
+ $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${start_line}&lines=50&order=asc`, (data) => {
+ if (data.Lines == 0){
+ $(`#log_npu_header`).text('您已翻阅至日志顶部')
+ $(`#log_npu_message`).css('display', 'block')
+ setTimeout(function(){
+ $(`#log_npu_message`).css('display', 'none')
+ }, 1000)
+ }else{
+ $(`#log_npu input[name=start_line]`).val(data.StartLine) //如果变动就改变所对应的值
+ $(`#log_npu`).prepend('' + data.Content)
+ }
+ }).fail(function(err) {
+ console.log(err);
+ });
+ }
+ }
+ function scrollAnimation(dom, currentY, targetY, currentX) {
+ let needScrollTop = targetY - currentY;
+ let _currentY = currentY;
+ setTimeout(() => {
+ // 一次调用滑动帧数,每次调用会不一样
+ //取总距离的十分之一
+ const dist = Math.ceil(needScrollTop / 10);
+ _currentY += dist;
+ //移动一个十分之一
+ dom.scrollTo(currentX || 0, _currentY,'smooth');
+ // 如果移动幅度小于十个像素,直接移动,否则递归调用,实现动画效果
+ if (needScrollTop > 10 || needScrollTop < -10) {
+ scrollAnimation(dom, _currentY, targetY)
+ } else {
+ dom.scrollTo(0, targetY,'smooth')
+ }
+ }, 1)
+ }
+
+ $('.log_top').click(function(){
+
+ let logContentDom = document.querySelector(`#log_npu`)
+
+ $(`#log_file_npu`).siblings('pre').remove()
+ $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=asc`, (data) => {
+
+ $(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值
+ $(`#log_npu input[name=start_line]`).val(data.StartLine)
+ $(`#log_npu`).prepend('' + data.Content)
+ $(`#log_npu_header`).text('您已翻阅至日志顶部')
+ $(`#log_npu_message`).css('display', 'block')
+ setTimeout(function(){
+ $(`#log_npu_message`).css('display', 'none')
+ }, 1000)
+ scrollAnimation(logContentDom, logContentDom.scrollTop, 0);
+ })
+
+ })
+ $('.log_bottom').click(function(e){
+
+ let logContentDom = document.querySelector(`#log_npu`)
+ $(`#log_file_npu`).siblings('pre').remove()
+ $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=desc`, (data) => {
+
+ $(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值
+ $(`#log_npu input[name=start_line]`).val(data.StartLine)
+ $(`#log_npu`).append('' + data.Content)
+ $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${data.EndLine}&lines=50&order=desc`, (data) => {
+ if (data.Lines == 0){
+ $(`#log_npu_header`).text('您已翻阅至日志底部')
+ $(`#log_npu_message`).css('display', 'block')
+ setTimeout(function(){
+ $(`#log_npu_message`).css('display', 'none')
+ }, 1000)
+ }else{
+ if(end_line===data.EndLine){
+ return
+ }
+ else{
+ $(`#log_npu input[name=end_line]`).val(data.EndLine)
+ $(`#log_npu`).append('' + data.Content)
+ }
+
+ }
+ }).fail(function(err) {
+ console.log(err);
+ });
+ scrollAnimation(logContentDom, logContentDom.scrollTop+1, logContentDom.scrollHeight - logContentDom.clientHeight);
+ })
+ })
\ No newline at end of file