| @@ -174,7 +174,7 @@ sendjob: | |||||
| return &result, nil | return &result, nil | ||||
| } | } | ||||
| func StopJob(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||||
| func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||||
| checkSetting() | checkSetting() | ||||
| client := getRestyClient() | client := getRestyClient() | ||||
| var result models.NotebookActionResult | var result models.NotebookActionResult | ||||
| @@ -207,8 +207,8 @@ sendjob: | |||||
| } | } | ||||
| if len(response.ErrorCode) != 0 { | if len(response.ErrorCode) != 0 { | ||||
| log.Error("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| return &result, fmt.Errorf("StopJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| } | } | ||||
| return &result, nil | return &result, nil | ||||
| @@ -247,6 +247,54 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") | ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") | ||||
| } | } | ||||
| func CloudBrainRestart(ctx *context.Context) { | |||||
| ctx.Data["PageIsCloudBrain"] = true | |||||
| /* | |||||
| 1、查询job,判断status | |||||
| 2、利用查询出来的配置重新启动一个debug环境(使用相同的名称) | |||||
| 3、更新此任务的状态 | |||||
| */ | |||||
| var jobID = ctx.Params(":jobid") | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) | |||||
| return | |||||
| } | |||||
| if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr("the job is not stopped", tplCloudBrainIndex, nil) | |||||
| return | |||||
| } | |||||
| count, err := models.GetCloudbrainCountByUserID(ctx.User.ID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr("system error", tplCloudBrainIndex, nil) | |||||
| return | |||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainIndex, nil) | |||||
| return | |||||
| } | |||||
| } | |||||
| jobName := task.JobName | |||||
| codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath | |||||
| err = cloudbrain.GenerateTask(ctx, jobName, image, cloudbrain.Command, task.Uuid, codePath, getMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||||
| getMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), getMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||||
| getMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, resourceSpecId) | |||||
| if err != nil { | |||||
| ctx.RenderWithErr(err.Error(), tplCloudBrainIndex, nil) | |||||
| return | |||||
| } | |||||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob") | |||||
| } | |||||
| func CloudBrainShow(ctx *context.Context) { | func CloudBrainShow(ctx *context.Context) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| @@ -423,7 +471,7 @@ func StopJobs(cloudBrains []*models.Cloudbrain) { | |||||
| Action: models.ActionStop, | Action: models.ActionStop, | ||||
| } | } | ||||
| err := retry(3, time.Second*30, func() error { | err := retry(3, time.Second*30, func() error { | ||||
| _, err := modelarts.StopJob(taskInfo.JobID, param) | |||||
| _, err := modelarts.ManageNotebook(taskInfo.JobID, param) | |||||
| return err | return err | ||||
| }) | }) | ||||
| logErrorAndUpdateJobStatus(err, taskInfo) | logErrorAndUpdateJobStatus(err, taskInfo) | ||||
| @@ -232,28 +232,54 @@ func NotebookDebug(ctx *context.Context) { | |||||
| ctx.Redirect(debugUrl) | ctx.Redirect(debugUrl) | ||||
| } | } | ||||
| func NotebookStop(ctx *context.Context) { | |||||
| func NotebookManage(ctx *context.Context) { | |||||
| var jobID = ctx.Params(":jobid") | var jobID = ctx.Params(":jobid") | ||||
| log.Info(jobID) | |||||
| var action = ctx.Params(":action") | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | task, err := models.GetCloudbrainByJobID(jobID) | ||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | ctx.ServerError("GetCloudbrainByJobID failed", err) | ||||
| return | return | ||||
| } | } | ||||
| if task.Status != string(models.JobRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName) | |||||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||||
| if action == models.ActionStop { | |||||
| if task.Status != string(models.ModelArtsRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName) | |||||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||||
| return | |||||
| } | |||||
| } else if action == models.ActionRestart { | |||||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName) | |||||
| ctx.ServerError("the job is not running", errors.New("the job is not running")) | |||||
| return | |||||
| } | |||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr("system error", tplDebugJobIndex, nil) | |||||
| return | |||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplDebugJobIndex, nil) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { | |||||
| log.Error("the action(%s) is illegal", action) | |||||
| ctx.ServerError("the action is illegal", errors.New("the action is illegal")) | |||||
| return | return | ||||
| } | } | ||||
| param := models.NotebookAction{ | param := models.NotebookAction{ | ||||
| Action: models.ActionStop, | |||||
| Action: action, | |||||
| } | } | ||||
| res, err := modelarts.StopJob(jobID, param) | |||||
| res, err := modelarts.ManageNotebook(jobID, param) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("StopJob(%s) failed:%v", task.JobName, err.Error()) | |||||
| ctx.ServerError("StopJob failed", err) | |||||
| log.Error("ManageNotebook(%s) failed:%v", task.JobName, err.Error()) | |||||
| ctx.ServerError("ManageNotebook failed", err) | |||||
| return | return | ||||
| } | } | ||||
| @@ -968,6 +968,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Post("/commit_image", cloudbrain.AdminOrOwnerOrJobCreaterRight, bindIgnErr(auth.CommitImageCloudBrainForm{}), repo.CloudBrainCommitImage) | m.Post("/commit_image", cloudbrain.AdminOrOwnerOrJobCreaterRight, bindIgnErr(auth.CommitImageCloudBrainForm{}), repo.CloudBrainCommitImage) | ||||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainStop) | m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainStop) | ||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDel) | m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDel) | ||||
| m.Post("/restart", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainRestart) | |||||
| m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) | m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) | ||||
| m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) | m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) | ||||
| m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDownloadModel) | m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.CloudBrainDownloadModel) | ||||
| @@ -1002,7 +1003,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Group("/:jobid", func() { | m.Group("/:jobid", func() { | ||||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | ||||
| m.Get("/debug", reqRepoCloudBrainWriter, repo.NotebookDebug) | m.Get("/debug", reqRepoCloudBrainWriter, repo.NotebookDebug) | ||||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookStop) | |||||
| m.Post("/:action", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookManage) | |||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | ||||
| }) | }) | ||||
| m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) | m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) | ||||