From 2f7e4d34c65f8eeb22255c9ec431736cdebc2c22 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Wed, 13 Jul 2022 18:23:30 +0800 Subject: [PATCH] debug --- routers/repo/modelarts.go | 197 ++++++++++++++++++++------------------ routers/routes/routes.go | 5 +- 2 files changed, 107 insertions(+), 95 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index be2928aac..11c3557b2 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -393,83 +393,123 @@ func NotebookDebug2(ctx *context.Context) { ctx.Redirect(result.Url + "?token=" + result.Token) } -func NotebookManage(ctx *context.Context) { +func NotebookRestart(ctx *context.Context) { var ID = ctx.Params(":id") - var action = ctx.Params(":action") var resultCode = "0" var errorMsg = "" var status = "" + task := ctx.Cloudbrain + for { - task, err := models.GetCloudbrainByID(ID) - if err != nil { - log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) + ctx.CheckWechatBind() + if ctx.Written() { + return + } + if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { + log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) resultCode = "-1" - errorMsg = "system error" + errorMsg = "the job is not stopped" break } - if action == models.ActionStop { - if task.Status != string(models.ModelArtsRunning) { - log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) + count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = "system error" + break + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) resultCode = "-1" - errorMsg = "the job is not running" + errorMsg = "you have already a running or waiting task, can not create more" break } + } - if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) { - log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "you have no right to stop the job" - break - } - } else if action == models.ActionRestart { - ctx.CheckWechatBind() - if ctx.Written() { - return - } - if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { - log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "the job is not stopped" - break + param := models.NotebookAction{ + Action: models.ActionStart, + } + createTime := timeutil.TimeStampNow() + //todo: mod the sequence + res, err := modelarts.ManageNotebook2(task.JobID, param) + if err != nil { + log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = err.Error() + if strings.Contains(err.Error(), modelarts.NotebookNotFound) { + errorMsg = "the job's version is too old and can not be restarted" } + break + } - if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) { - log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "you have no right to restart the job" - break - } + status = res.Status + newTask := &models.Cloudbrain{ + Status: status, + UserID: task.UserID, + RepoID: task.RepoID, + JobID: task.JobID, + JobName: task.JobName, + DisplayJobName: task.DisplayJobName, + JobType: task.JobType, + Type: task.Type, + Uuid: task.Uuid, + Image: task.Image, + ComputeResource: task.ComputeResource, + Description: task.Description, + CreatedUnix: createTime, + UpdatedUnix: createTime, + } - count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) - if err != nil { - log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "system error" - break - } else { - if count >= 1 { - log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "you have already a running or waiting task, can not create more" - break - } - } + err = models.RestartCloudbrain(task, newTask) + if err != nil { + log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = "system error" + break + } + ID = strconv.FormatInt(newTask.ID, 10) + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask) - action = models.ActionStart - } else { - log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"]) + break + } + + ctx.JSON(200, map[string]string{ + "result_code": resultCode, + "error_msg": errorMsg, + "status": status, + "id": ID, + }) +} + +func NotebookStop(ctx *context.Context) { + var ID = ctx.Params(":id") + var resultCode = "0" + var errorMsg = "" + var status = "" + + task := ctx.Cloudbrain + + for { + if task.Status != string(models.ModelArtsRunning) { + log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) resultCode = "-1" - errorMsg = "非法操作" + errorMsg = "the job is not running" + break + } + + if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) { + log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = "you have no right to stop the job" break } param := models.NotebookAction{ - Action: action, + Action: models.ActionStop, } - createTime := timeutil.TimeStampNow() - //todo: mod the sequence + res, err := modelarts.ManageNotebook2(task.JobID, param) if err != nil { log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) @@ -482,46 +522,17 @@ func NotebookManage(ctx *context.Context) { } status = res.Status - if action == models.ActionStart { - newTask := &models.Cloudbrain{ - Status: status, - UserID: task.UserID, - RepoID: task.RepoID, - JobID: task.JobID, - JobName: task.JobName, - DisplayJobName: task.DisplayJobName, - JobType: task.JobType, - Type: task.Type, - Uuid: task.Uuid, - Image: task.Image, - ComputeResource: task.ComputeResource, - Description: task.Description, - CreatedUnix: createTime, - UpdatedUnix: createTime, - } - - err = models.RestartCloudbrain(task, newTask) - if err != nil { - log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "system error" - break - } - ID = strconv.FormatInt(newTask.ID, 10) - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask) - } else { - task.Status = res.Status - if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { - task.EndTime = timeutil.TimeStampNow() - } - task.ComputeAndSetDuration() - err = models.UpdateJob(task) - if err != nil { - log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "system error" - break - } + task.Status = res.Status + if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = "system error" + break } break diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 8f5fb4d5a..281edd385 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -500,7 +500,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/email2user", user.Email2User) m.Get("/recover_account", user.ResetPasswd) m.Post("/recover_account", user.ResetPasswdPost) - m.Post("/recover_account_by_phone",bindIgnErr(auth.ResetPassWordByPhoneForm{}), user.ResetPasswdByPhonePost) + m.Post("/recover_account_by_phone", bindIgnErr(auth.ResetPassWordByPhoneForm{}), user.ResetPasswdByPhonePost) m.Get("/forgot_password", user.ForgotPasswd) m.Post("/forgot_password", user.ForgotPasswdPost) m.Post("/logout", user.SignOut) @@ -1165,7 +1165,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/:id", func() { m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) - m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) + m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart) + m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew)