From 9bb9826a907d51a9c2b74b5d803bcb0a4e3cf144 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Thu, 16 Dec 2021 18:03:25 +0800 Subject: [PATCH] backup --- modules/modelarts/resty.go | 6 ++--- routers/repo/cloudbrain.go | 50 +++++++++++++++++++++++++++++++++++++- routers/repo/modelarts.go | 44 ++++++++++++++++++++++++++------- routers/routes/routes.go | 3 ++- 4 files changed, 89 insertions(+), 14 deletions(-) diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index 2cc9e34be..07f26ceb7 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -174,7 +174,7 @@ sendjob: return &result, nil } -func StopJob(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { +func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { checkSetting() client := getRestyClient() var result models.NotebookActionResult @@ -207,8 +207,8 @@ sendjob: } if len(response.ErrorCode) != 0 { - log.Error("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) - return &result, fmt.Errorf("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) + log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) + return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) } return &result, nil diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 3f5fce013..7de4eafe7 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -247,6 +247,54 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") } +func CloudBrainRestart(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + + /* + 1、查询job,判断status + 2、利用查询出来的配置重新启动一个debug环境(使用相同的名称) + 3、更新此任务的状态 + */ + + var jobID = ctx.Params(":jobid") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"]) + ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) + return + } + + if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { + log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) + ctx.RenderWithErr("the job is not stopped", tplCloudBrainIndex, nil) + return + } + + count, err := models.GetCloudbrainCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) + ctx.RenderWithErr("system error", tplCloudBrainIndex, nil) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainIndex, nil) + return + } + } + + jobName := task.JobName + codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath + err = cloudbrain.GenerateTask(ctx, jobName, image, cloudbrain.Command, task.Uuid, codePath, getMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + getMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), getMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + getMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, resourceSpecId) + if err != nil { + ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") +} + func CloudBrainShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true @@ -423,7 +471,7 @@ func StopJobs(cloudBrains []*models.Cloudbrain) { Action: models.ActionStop, } err := retry(3, time.Second*30, func() error { - _, err := modelarts.StopJob(taskInfo.JobID, param) + _, err := modelarts.ManageNotebook(taskInfo.JobID, param) return err }) logErrorAndUpdateJobStatus(err, taskInfo) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index d2a094bc8..a1e47e11f 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -232,28 +232,54 @@ func NotebookDebug(ctx *context.Context) { ctx.Redirect(debugUrl) } -func NotebookStop(ctx *context.Context) { +func NotebookManage(ctx *context.Context) { var jobID = ctx.Params(":jobid") - log.Info(jobID) + var action = ctx.Params(":action") + task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } - if task.Status != string(models.JobRunning) { - log.Error("the job(%s) is not running", task.JobName) - ctx.ServerError("the job is not running", errors.New("the job is not running")) + if action == models.ActionStop { + if task.Status != string(models.ModelArtsRunning) { + log.Error("the job(%s) is not running", task.JobName) + ctx.ServerError("the job is not running", errors.New("the job is not running")) + return + } + } else if action == models.ActionRestart { + if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { + log.Error("the job(%s) is not stopped", task.JobName) + ctx.ServerError("the job is not running", errors.New("the job is not running")) + return + } + + count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) + ctx.RenderWithErr("system error", tplDebugJobIndex, nil) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplDebugJobIndex, nil) + return + } + } + } else { + log.Error("the action(%s) is illegal", action) + ctx.ServerError("the action is illegal", errors.New("the action is illegal")) return } param := models.NotebookAction{ - Action: models.ActionStop, + Action: action, } - res, err := modelarts.StopJob(jobID, param) + res, err := modelarts.ManageNotebook(jobID, param) if err != nil { - log.Error("StopJob(%s) failed:%v", task.JobName, err.Error()) - ctx.ServerError("StopJob failed", err) + log.Error("ManageNotebook(%s) failed:%v", task.JobName, err.Error()) + ctx.ServerError("ManageNotebook failed", err) return } diff --git a/routers/routes/routes.go b/routers/routes/routes.go index cfd962603..46c864f21 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -968,6 +968,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/commit_image", cloudbrain.AdminOrOwnerOrJobCreaterRight, bindIgnErr(auth.CommitImageCloudBrainForm{}), repo.CloudBrainCommitImage) m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainStop) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDel) + m.Post("/restart", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainRestart) m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDownloadModel) @@ -1002,7 +1003,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("/debug", reqRepoCloudBrainWriter, repo.NotebookDebug) - m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookStop) + m.Post("/:action", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookManage) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) }) m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew)