Browse Source

debug

fix-2419
lewis 3 years ago
parent
commit
2f7e4d34c6
2 changed files with 107 additions and 95 deletions
  1. +104
    -93
      routers/repo/modelarts.go
  2. +3
    -2
      routers/routes/routes.go

+ 104
- 93
routers/repo/modelarts.go View File

@@ -393,83 +393,123 @@ func NotebookDebug2(ctx *context.Context) {
ctx.Redirect(result.Url + "?token=" + result.Token) ctx.Redirect(result.Url + "?token=" + result.Token)
} }


func NotebookManage(ctx *context.Context) {
func NotebookRestart(ctx *context.Context) {
var ID = ctx.Params(":id") var ID = ctx.Params(":id")
var action = ctx.Params(":action")
var resultCode = "0" var resultCode = "0"
var errorMsg = "" var errorMsg = ""
var status = "" var status = ""


task := ctx.Cloudbrain

for { for {
task, err := models.GetCloudbrainByID(ID)
if err != nil {
log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
ctx.CheckWechatBind()
if ctx.Written() {
return
}
if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
resultCode = "-1" resultCode = "-1"
errorMsg = "system error"
errorMsg = "the job is not stopped"
break break
} }


if action == models.ActionStop {
if task.Status != string(models.ModelArtsRunning) {
log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
resultCode = "-1" resultCode = "-1"
errorMsg = "the job is not running"
errorMsg = "you have already a running or waiting task, can not create more"
break break
} }
}


if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) {
log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have no right to stop the job"
break
}
} else if action == models.ActionRestart {
ctx.CheckWechatBind()
if ctx.Written() {
return
}
if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "the job is not stopped"
break
param := models.NotebookAction{
Action: models.ActionStart,
}
createTime := timeutil.TimeStampNow()
//todo: mod the sequence
res, err := modelarts.ManageNotebook2(task.JobID, param)
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = err.Error()
if strings.Contains(err.Error(), modelarts.NotebookNotFound) {
errorMsg = "the job's version is too old and can not be restarted"
} }
break
}


if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) {
log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have no right to restart the job"
break
}
status = res.Status
newTask := &models.Cloudbrain{
Status: status,
UserID: task.UserID,
RepoID: task.RepoID,
JobID: task.JobID,
JobName: task.JobName,
DisplayJobName: task.DisplayJobName,
JobType: task.JobType,
Type: task.Type,
Uuid: task.Uuid,
Image: task.Image,
ComputeResource: task.ComputeResource,
Description: task.Description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
}


count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have already a running or waiting task, can not create more"
break
}
}
err = models.RestartCloudbrain(task, newTask)
if err != nil {
log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}
ID = strconv.FormatInt(newTask.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask)


action = models.ActionStart
} else {
log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"])
break
}

ctx.JSON(200, map[string]string{
"result_code": resultCode,
"error_msg": errorMsg,
"status": status,
"id": ID,
})
}

func NotebookStop(ctx *context.Context) {
var ID = ctx.Params(":id")
var resultCode = "0"
var errorMsg = ""
var status = ""

task := ctx.Cloudbrain

for {
if task.Status != string(models.ModelArtsRunning) {
log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
resultCode = "-1" resultCode = "-1"
errorMsg = "非法操作"
errorMsg = "the job is not running"
break
}

if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) {
log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have no right to stop the job"
break break
} }


param := models.NotebookAction{ param := models.NotebookAction{
Action: action,
Action: models.ActionStop,
} }
createTime := timeutil.TimeStampNow()
//todo: mod the sequence

res, err := modelarts.ManageNotebook2(task.JobID, param) res, err := modelarts.ManageNotebook2(task.JobID, param)
if err != nil { if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
@@ -482,46 +522,17 @@ func NotebookManage(ctx *context.Context) {
} }


status = res.Status status = res.Status
if action == models.ActionStart {
newTask := &models.Cloudbrain{
Status: status,
UserID: task.UserID,
RepoID: task.RepoID,
JobID: task.JobID,
JobName: task.JobName,
DisplayJobName: task.DisplayJobName,
JobType: task.JobType,
Type: task.Type,
Uuid: task.Uuid,
Image: task.Image,
ComputeResource: task.ComputeResource,
Description: task.Description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
}

err = models.RestartCloudbrain(task, newTask)
if err != nil {
log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}
ID = strconv.FormatInt(newTask.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask)
} else {
task.Status = res.Status
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}
task.Status = res.Status
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
} }


break break


+ 3
- 2
routers/routes/routes.go View File

@@ -500,7 +500,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Get("/email2user", user.Email2User) m.Get("/email2user", user.Email2User)
m.Get("/recover_account", user.ResetPasswd) m.Get("/recover_account", user.ResetPasswd)
m.Post("/recover_account", user.ResetPasswdPost) m.Post("/recover_account", user.ResetPasswdPost)
m.Post("/recover_account_by_phone",bindIgnErr(auth.ResetPassWordByPhoneForm{}), user.ResetPasswdByPhonePost)
m.Post("/recover_account_by_phone", bindIgnErr(auth.ResetPassWordByPhoneForm{}), user.ResetPasswdByPhonePost)
m.Get("/forgot_password", user.ForgotPasswd) m.Get("/forgot_password", user.ForgotPasswd)
m.Post("/forgot_password", user.ForgotPasswdPost) m.Post("/forgot_password", user.ForgotPasswdPost)
m.Post("/logout", user.SignOut) m.Post("/logout", user.SignOut)
@@ -1165,7 +1165,8 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:id", func() { m.Group("/:id", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2)
m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage)
m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart)
m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
}) })
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew)


Loading…
Cancel
Save