| @@ -174,7 +174,7 @@ sendjob: | |||
| return &result, nil | |||
| } | |||
| func StopJob(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
| func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.NotebookActionResult | |||
| @@ -207,8 +207,8 @@ sendjob: | |||
| } | |||
| if len(response.ErrorCode) != 0 { | |||
| log.Error("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| return &result, fmt.Errorf("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| } | |||
| return &result, nil | |||
| @@ -247,6 +247,54 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") | |||
| } | |||
| func CloudBrainRestart(ctx *context.Context) { | |||
| ctx.Data["PageIsCloudBrain"] = true | |||
| /* | |||
| 1、查询job,判断status | |||
| 2、利用查询出来的配置重新启动一个debug环境(使用相同的名称) | |||
| 3、更新此任务的状态 | |||
| */ | |||
| var jobID = ctx.Params(":jobid") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) | |||
| return | |||
| } | |||
| if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { | |||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr("the job is not stopped", tplCloudBrainIndex, nil) | |||
| return | |||
| } | |||
| count, err := models.GetCloudbrainCountByUserID(ctx.User.ID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr("system error", tplCloudBrainIndex, nil) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainIndex, nil) | |||
| return | |||
| } | |||
| } | |||
| jobName := task.JobName | |||
| codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath | |||
| err = cloudbrain.GenerateTask(ctx, jobName, image, cloudbrain.Command, task.Uuid, codePath, getMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
| getMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), getMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
| getMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, resourceSpecId) | |||
| if err != nil { | |||
| ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") | |||
| } | |||
| func CloudBrainShow(ctx *context.Context) { | |||
| ctx.Data["PageIsCloudBrain"] = true | |||
| @@ -423,7 +471,7 @@ func StopJobs(cloudBrains []*models.Cloudbrain) { | |||
| Action: models.ActionStop, | |||
| } | |||
| err := retry(3, time.Second*30, func() error { | |||
| _, err := modelarts.StopJob(taskInfo.JobID, param) | |||
| _, err := modelarts.ManageNotebook(taskInfo.JobID, param) | |||
| return err | |||
| }) | |||
| logErrorAndUpdateJobStatus(err, taskInfo) | |||
| @@ -232,28 +232,54 @@ func NotebookDebug(ctx *context.Context) { | |||
| ctx.Redirect(debugUrl) | |||
| } | |||
| func NotebookStop(ctx *context.Context) { | |||
| func NotebookManage(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| log.Info(jobID) | |||
| var action = ctx.Params(":action") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | |||
| return | |||
| } | |||
| if task.Status != string(models.JobRunning) { | |||
| log.Error("the job(%s) is not running", task.JobName) | |||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||
| if action == models.ActionStop { | |||
| if task.Status != string(models.ModelArtsRunning) { | |||
| log.Error("the job(%s) is not running", task.JobName) | |||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||
| return | |||
| } | |||
| } else if action == models.ActionRestart { | |||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||
| log.Error("the job(%s) is not stopped", task.JobName) | |||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||
| return | |||
| } | |||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr("system error", tplDebugJobIndex, nil) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplDebugJobIndex, nil) | |||
| return | |||
| } | |||
| } | |||
| } else { | |||
| log.Error("the action(%s) is illegal", action) | |||
| ctx.ServerError("the action is illegal", errors.New("the action is illegal")) | |||
| return | |||
| } | |||
| param := models.NotebookAction{ | |||
| Action: models.ActionStop, | |||
| Action: action, | |||
| } | |||
| res, err := modelarts.StopJob(jobID, param) | |||
| res, err := modelarts.ManageNotebook(jobID, param) | |||
| if err != nil { | |||
| log.Error("StopJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.ServerError("StopJob failed", err) | |||
| log.Error("ManageNotebook(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.ServerError("ManageNotebook failed", err) | |||
| return | |||
| } | |||
| @@ -968,6 +968,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Post("/commit_image", cloudbrain.AdminOrOwnerOrJobCreaterRight, bindIgnErr(auth.CommitImageCloudBrainForm{}), repo.CloudBrainCommitImage) | |||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainStop) | |||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDel) | |||
| m.Post("/restart", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainRestart) | |||
| m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) | |||
| m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) | |||
| m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDownloadModel) | |||
| @@ -1002,7 +1003,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Group("/:jobid", func() { | |||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | |||
| m.Get("/debug", reqRepoCloudBrainWriter, repo.NotebookDebug) | |||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookStop) | |||
| m.Post("/:action", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookManage) | |||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | |||
| }) | |||
| m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) | |||