Browse Source

backup

tags/v1.21.12.2^2
lewis 3 years ago
parent
commit
9bb9826a90
4 changed files with 89 additions and 14 deletions
  1. +3
    -3
      modules/modelarts/resty.go
  2. +49
    -1
      routers/repo/cloudbrain.go
  3. +35
    -9
      routers/repo/modelarts.go
  4. +2
    -1
      routers/routes/routes.go

+ 3
- 3
modules/modelarts/resty.go View File

@@ -174,7 +174,7 @@ sendjob:
return &result, nil
}

func StopJob(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
checkSetting()
client := getRestyClient()
var result models.NotebookActionResult
@@ -207,8 +207,8 @@ sendjob:
}

if len(response.ErrorCode) != 0 {
log.Error("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
return &result, fmt.Errorf("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

return &result, nil


+ 49
- 1
routers/repo/cloudbrain.go View File

@@ -247,6 +247,54 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob")
}

func CloudBrainRestart(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true

/*
1、查询job,判断status
2、利用查询出来的配置重新启动一个debug环境(使用相同的名称)
3、更新此任务的状态
*/

var jobID = ctx.Params(":jobid")
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"])
ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil)
return
}

if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
ctx.RenderWithErr("the job is not stopped", tplCloudBrainIndex, nil)
return
}

count, err := models.GetCloudbrainCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
ctx.RenderWithErr("system error", tplCloudBrainIndex, nil)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainIndex, nil)
return
}
}

jobName := task.JobName
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
err = cloudbrain.GenerateTask(ctx, jobName, image, cloudbrain.Command, task.Uuid, codePath, getMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
getMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), getMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
getMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, resourceSpecId)
if err != nil {
ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob")
}

func CloudBrainShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true

@@ -423,7 +471,7 @@ func StopJobs(cloudBrains []*models.Cloudbrain) {
Action: models.ActionStop,
}
err := retry(3, time.Second*30, func() error {
_, err := modelarts.StopJob(taskInfo.JobID, param)
_, err := modelarts.ManageNotebook(taskInfo.JobID, param)
return err
})
logErrorAndUpdateJobStatus(err, taskInfo)


+ 35
- 9
routers/repo/modelarts.go View File

@@ -232,28 +232,54 @@ func NotebookDebug(ctx *context.Context) {
ctx.Redirect(debugUrl)
}

func NotebookStop(ctx *context.Context) {
func NotebookManage(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
log.Info(jobID)
var action = ctx.Params(":action")

task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.ServerError("GetCloudbrainByJobID failed", err)
return
}

if task.Status != string(models.JobRunning) {
log.Error("the job(%s) is not running", task.JobName)
ctx.ServerError("the job is not running", errors.New("the job is not running"))
if action == models.ActionStop {
if task.Status != string(models.ModelArtsRunning) {
log.Error("the job(%s) is not running", task.JobName)
ctx.ServerError("the job is not running", errors.New("the job is not running"))
return
}
} else if action == models.ActionRestart {
if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
log.Error("the job(%s) is not stopped", task.JobName)
ctx.ServerError("the job is not running", errors.New("the job is not running"))
return
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
ctx.RenderWithErr("system error", tplDebugJobIndex, nil)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplDebugJobIndex, nil)
return
}
}
} else {
log.Error("the action(%s) is illegal", action)
ctx.ServerError("the action is illegal", errors.New("the action is illegal"))
return
}

param := models.NotebookAction{
Action: models.ActionStop,
Action: action,
}
res, err := modelarts.StopJob(jobID, param)
res, err := modelarts.ManageNotebook(jobID, param)
if err != nil {
log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
ctx.ServerError("StopJob failed", err)
log.Error("ManageNotebook(%s) failed:%v", task.JobName, err.Error())
ctx.ServerError("ManageNotebook failed", err)
return
}



+ 2
- 1
routers/routes/routes.go View File

@@ -968,6 +968,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/commit_image", cloudbrain.AdminOrOwnerOrJobCreaterRight, bindIgnErr(auth.CommitImageCloudBrainForm{}), repo.CloudBrainCommitImage)
m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainStop)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDel)
m.Post("/restart", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainRestart)
m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate)
m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels)
m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDownloadModel)
@@ -1002,7 +1003,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", reqRepoCloudBrainWriter, repo.NotebookDebug)
m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookStop)
m.Post("/:action", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookManage)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
})
m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew)


Loading…
Cancel
Save