From e65f269459e9822bf780aab1698bb0c365d3162d Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Fri, 31 Dec 2021 17:53:44 +0800 Subject: [PATCH 1/4] opt query times --- modules/cloudbrain/cloudbrain.go | 3 ++- modules/context/context.go | 1 + routers/repo/cloudbrain.go | 14 +------------- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index b8aa2e143..40da8a8c7 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -82,7 +82,7 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { var jobID = ctx.Params(":jobid") job, err := models.GetCloudbrainByJobID(jobID) - + ctx.Cloudbrain = job if !isAdminOrOwnerOrJobCreater(ctx, job, err) { ctx.NotFound(ctx.Req.URL.RequestURI(), nil) @@ -94,6 +94,7 @@ func AdminOrJobCreaterRight(ctx *context.Context) { var jobID = ctx.Params(":jobid") job, err := models.GetCloudbrainByJobID(jobID) + ctx.Cloudbrain = job if !isAdminOrJobCreater(ctx, job, err) { ctx.NotFound(ctx.Req.URL.RequestURI(), nil) diff --git a/modules/context/context.go b/modules/context/context.go index 5f09e190d..2046ab252 100755 --- a/modules/context/context.go +++ b/modules/context/context.go @@ -46,6 +46,7 @@ type Context struct { Repo *Repository Org *Organization + Cloudbrain *models.Cloudbrain } // IsUserSiteAdmin returns true if current user is a site admin diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index b24b4e90e..1a46bcbe0 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -369,19 +369,7 @@ func CloudBrainShow(ctx *context.Context) { } func CloudBrainDebug(ctx *context.Context) { - var jobID = ctx.Params(":jobid") - if !ctx.IsSigned { - log.Error("the user has not signed in") - ctx.Error(http.StatusForbidden, "", "the user has not signed in") - return - } - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.ServerError("GetCloudbrainByJobID failed", err) - return - } - - debugUrl := setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName + debugUrl := setting.DebugServerHost + "jpylab_" + ctx.Cloudbrain.JobID + "_" + ctx.Cloudbrain.SubTaskName ctx.Redirect(debugUrl) } From ff861b987f10ecf4cfb8af545c430d344376b04b Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Tue, 4 Jan 2022 15:52:33 +0800 Subject: [PATCH 2/4] opt --- routers/repo/cloudbrain.go | 52 +++++++------------------------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 1a46bcbe0..000b6c83c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -253,15 +253,8 @@ func CloudBrainRestart(ctx *context.Context) { var errorMsg = "" var status = "" + task := ctx.Cloudbrain for { - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"]) - resultCode = "-1" - errorMsg = "system error" - break - } - if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) resultCode = "-1" @@ -374,29 +367,14 @@ func CloudBrainDebug(ctx *context.Context) { } func CloudBrainCommitImage(ctx *context.Context, form auth.CommitImageCloudBrainForm) { - var jobID = ctx.Params(":jobid") - if !ctx.IsSigned { - log.Error("the user has not signed in") - ctx.Error(http.StatusForbidden, "", "the user has not signed in") - return - } - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": "GetCloudbrainByJobID failed", - }) - return - } - - err = cloudbrain.CommitImage(jobID, models.CommitImageParams{ - Ip: task.ContainerIp, - TaskContainerId: task.ContainerID, + err := cloudbrain.CommitImage(ctx.Cloudbrain.JobID, models.CommitImageParams{ + Ip: ctx.Cloudbrain.ContainerIp, + TaskContainerId: ctx.Cloudbrain.ContainerID, ImageDescription: form.Description, ImageTag: form.Tag, }) if err != nil { - log.Error("CommitImage(%s) failed:%v", task.JobName, err.Error(), ctx.Data["msgID"]) + log.Error("CommitImage(%s) failed:%v", ctx.Cloudbrain.JobName, err.Error(), ctx.Data["msgID"]) ctx.JSON(200, map[string]string{ "result_code": "-1", "error_msg": "CommitImage failed", @@ -416,15 +394,8 @@ func CloudBrainStop(ctx *context.Context) { var errorMsg = "" var status = "" + task := ctx.Cloudbrain for { - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) - resultCode = "-1" - errorMsg = "system error" - break - } - if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) { log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) resultCode = "-1" @@ -432,7 +403,7 @@ func CloudBrainStop(ctx *context.Context) { break } - err = cloudbrain.StopJob(jobID) + err := cloudbrain.StopJob(jobID) if err != nil { log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) resultCode = "-1" @@ -542,12 +513,7 @@ func logErrorAndUpdateJobStatus(err error, taskInfo *models.Cloudbrain) { } func CloudBrainDel(ctx *context.Context) { - var jobID = ctx.Params(":jobid") - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.ServerError("GetCloudbrainByJobID failed", err) - return - } + task := ctx.Cloudbrain if task.Status != string(models.JobStopped) && task.Status != string(models.JobFailed) { log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) @@ -555,7 +521,7 @@ func CloudBrainDel(ctx *context.Context) { return } - err = models.DeleteJob(task) + err := models.DeleteJob(task) if err != nil { ctx.ServerError("DeleteJob failed", err) return From a095a7986301a85b7f46d33d809f34b6418c896b Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Tue, 4 Jan 2022 16:24:23 +0800 Subject: [PATCH 3/4] reduce query times --- routers/repo/modelarts.go | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index a907aca01..071d83883 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -192,11 +192,6 @@ func NotebookShow(ctx *context.Context) { func NotebookDebug(ctx *context.Context) { var jobID = ctx.Params(":jobid") - _, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.ServerError("GetCloudbrainByJobID failed", err) - return - } result, err := modelarts.GetJob(jobID) if err != nil { @@ -325,11 +320,7 @@ func NotebookManage(ctx *context.Context) { func NotebookDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.ServerError("GetCloudbrainByJobID failed", err) - return - } + task := ctx.Cloudbrain if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) { log.Error("the job(%s) has not been stopped", task.JobName) @@ -337,7 +328,7 @@ func NotebookDel(ctx *context.Context) { return } - _, err = modelarts.DelNotebook(jobID) + _, err := modelarts.DelNotebook(jobID) if err != nil { log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("DelJob failed", err) @@ -1421,14 +1412,9 @@ func TrainJobDel(ctx *context.Context) { func TrainJobStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) - return - } + task := ctx.Cloudbrain - _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + _, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) From 15f1289b6bb013a9789f15fb77ef5d151f9b992f Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Wed, 5 Jan 2022 11:30:25 +0800 Subject: [PATCH 4/4] restart opt --- models/cloudbrain.go | 25 +++++++++++++++++++++++++ modules/cloudbrain/cloudbrain.go | 27 +++++++++++++++++++++------ routers/repo/cloudbrain.go | 7 ++----- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index c3707c9e2..efaa9ffeb 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1210,3 +1210,28 @@ func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) { And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) return int(count), err } + +func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { + sess := x.NewSession() + defer sess.Close() + + if err = sess.Begin(); err != nil { + return err + } + + if _, err = sess.Delete(old); err != nil { + sess.Rollback() + return err + } + + if _, err = sess.Insert(new); err != nil { + sess.Rollback() + return err + } + + if err = sess.Commit(); err != nil { + return err + } + + return nil +} diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 40da8a8c7..74dcbe7b0 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -223,7 +223,7 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, return nil } -func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { +func RestartTask(ctx *context.Context, task *models.Cloudbrain, newJobID *string) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + @@ -313,7 +313,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { }, }) if err != nil { - log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("CreateJob failed:%v", err.Error(), ctx.Data["MsgID"]) return err } if jobResult.Code != Success { @@ -322,14 +322,29 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { } var jobID = jobResult.Payload["jobId"].(string) - task.JobID = jobID - task.Status = string(models.JobWaiting) - err = models.UpdateJob(task) + newTask := &models.Cloudbrain{ + Status: string(models.JobWaiting), + UserID: task.UserID, + RepoID: task.RepoID, + JobID: jobID, + JobName: task.JobName, + SubTaskName: task.SubTaskName, + JobType: task.JobType, + Type: task.Type, + Uuid: task.Uuid, + Image: task.Image, + GpuQueue: task.GpuQueue, + ResourceSpecId: task.ResourceSpecId, + ComputeResource: task.ComputeResource, + } + err = models.RestartCloudbrain(task, newTask) if err != nil { - log.Error("UpdateJob(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"]) + log.Error("RestartCloudbrain(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"]) return err } + *newJobID = jobID + return nil } diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 000b6c83c..c8818d9ba 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -251,7 +251,7 @@ func CloudBrainRestart(ctx *context.Context) { var jobID = ctx.Params(":jobid") var resultCode = "0" var errorMsg = "" - var status = "" + var status = string(models.JobWaiting) task := ctx.Cloudbrain for { @@ -291,7 +291,7 @@ func CloudBrainRestart(ctx *context.Context) { } } - err = cloudbrain.RestartTask(ctx, task) + err = cloudbrain.RestartTask(ctx, task, &jobID) if err != nil { log.Error("RestartTask failed:%v", err.Error(), ctx.Data["MsgID"]) resultCode = "-1" @@ -299,9 +299,6 @@ func CloudBrainRestart(ctx *context.Context) { break } - status = task.Status - jobID = task.JobID - break }